summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Makefile4
-rw-r--r--lib/librte_acl/Makefile9
-rw-r--r--lib/librte_acl/acl_run_altivec.h4
-rw-r--r--lib/librte_acl/acl_run_avx2.h2
-rw-r--r--lib/librte_acl/acl_run_neon.c4
-rw-r--r--lib/librte_acl/acl_run_neon.h10
-rw-r--r--lib/librte_acl/acl_run_sse.h4
-rw-r--r--lib/librte_acl/rte_acl_osdep.h1
-rw-r--r--lib/librte_bitratestats/rte_bitrate.c2
-rw-r--r--lib/librte_cmdline/cmdline_parse.c85
-rw-r--r--lib/librte_cmdline/cmdline_parse.h50
-rw-r--r--lib/librte_cmdline/cmdline_parse_etheraddr.c1
-rw-r--r--lib/librte_compat/rte_compat.h2
-rw-r--r--lib/librte_cryptodev/Makefile6
-rw-r--r--lib/librte_cryptodev/rte_crypto.h46
-rw-r--r--lib/librte_cryptodev/rte_crypto_sym.h640
-rw-r--r--lib/librte_cryptodev/rte_cryptodev.c655
-rw-r--r--lib/librte_cryptodev/rte_cryptodev.h342
-rw-r--r--lib/librte_cryptodev/rte_cryptodev_pci.h92
-rw-r--r--lib/librte_cryptodev/rte_cryptodev_pmd.c249
-rw-r--r--lib/librte_cryptodev/rte_cryptodev_pmd.h156
-rw-r--r--lib/librte_cryptodev/rte_cryptodev_vdev.h100
-rw-r--r--lib/librte_cryptodev/rte_cryptodev_version.map41
-rw-r--r--lib/librte_distributor/Makefile4
-rw-r--r--lib/librte_distributor/rte_distributor.c9
-rw-r--r--lib/librte_distributor/rte_distributor_v20.c2
-rw-r--r--lib/librte_eal/bsdapp/contigmem/contigmem.c197
-rw-r--r--lib/librte_eal/bsdapp/eal/Makefile3
-rw-r--r--lib/librte_eal/bsdapp/eal/eal.c25
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_pci.c8
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_thread.c1
-rw-r--r--lib/librte_eal/bsdapp/eal/rte_eal_version.map44
-rw-r--r--lib/librte_eal/common/Makefile3
-rw-r--r--lib/librte_eal/common/arch/arm/rte_cpuflags.c4
-rw-r--r--lib/librte_eal/common/eal_common_bus.c83
-rw-r--r--lib/librte_eal/common/eal_common_dev.c202
-rw-r--r--lib/librte_eal/common/eal_common_devargs.c161
-rw-r--r--lib/librte_eal/common/eal_common_launch.c4
-rw-r--r--lib/librte_eal/common/eal_common_lcore.c1
-rw-r--r--lib/librte_eal/common/eal_common_log.c13
-rw-r--r--lib/librte_eal/common/eal_common_memory.c12
-rw-r--r--lib/librte_eal/common/eal_common_memzone.c3
-rw-r--r--lib/librte_eal/common/eal_common_options.c248
-rw-r--r--lib/librte_eal/common/eal_common_pci.c154
-rw-r--r--lib/librte_eal/common/eal_common_proc.c8
-rw-r--r--lib/librte_eal/common/eal_common_tailqs.c1
-rw-r--r--lib/librte_eal/common/eal_common_timer.c1
-rw-r--r--lib/librte_eal/common/eal_common_vdev.c162
-rw-r--r--lib/librte_eal/common/eal_options.h1
-rw-r--r--lib/librte_eal/common/eal_private.h17
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_atomic_64.h4
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_byteorder.h2
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h4
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_cycles_64.h4
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_io.h4
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_io_64.h36
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h4
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_pause.h50
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_pause_32.h51
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_pause_64.h52
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h4
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_vect.h119
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_io.h4
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_pause.h51
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h1
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_cycles.h31
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_io.h4
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_memcpy.h5
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_pause.h53
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_spinlock.h1
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_vect.h16
-rw-r--r--lib/librte_eal/common/include/generic/rte_byteorder.h108
-rw-r--r--lib/librte_eal/common/include/generic/rte_cycles.h31
-rw-r--r--lib/librte_eal/common/include/generic/rte_io.h38
-rw-r--r--lib/librte_eal/common/include/generic/rte_pause.h52
-rw-r--r--lib/librte_eal/common/include/generic/rte_rwlock.h1
-rw-r--r--lib/librte_eal/common/include/generic/rte_spinlock.h1
-rw-r--r--lib/librte_eal/common/include/rte_alarm.h2
-rw-r--r--lib/librte_eal/common/include/rte_bus.h155
-rw-r--r--lib/librte_eal/common/include/rte_common.h48
-rw-r--r--lib/librte_eal/common/include/rte_dev.h84
-rw-r--r--lib/librte_eal/common/include/rte_devargs.h72
-rw-r--r--lib/librte_eal/common/include/rte_eal.h7
-rw-r--r--lib/librte_eal/common/include/rte_eal_memconfig.h1
-rw-r--r--lib/librte_eal/common/include/rte_lcore.h3
-rw-r--r--lib/librte_eal/common/include/rte_log.h10
-rw-r--r--lib/librte_eal/common/include/rte_malloc.h4
-rw-r--r--lib/librte_eal/common/include/rte_pci.h57
-rw-r--r--lib/librte_eal/common/include/rte_service.h387
-rw-r--r--lib/librte_eal/common/include/rte_service_component.h144
-rw-r--r--lib/librte_eal/common/include/rte_time.h2
-rw-r--r--lib/librte_eal/common/include/rte_vdev.h11
-rw-r--r--lib/librte_eal/common/include/rte_version.h4
-rw-r--r--lib/librte_eal/common/malloc_elem.c15
-rw-r--r--lib/librte_eal/common/rte_keepalive.c1
-rw-r--r--lib/librte_eal/common/rte_malloc.c4
-rw-r--r--lib/librte_eal/common/rte_service.c706
-rw-r--r--lib/librte_eal/linuxapp/eal/Makefile6
-rw-r--r--lib/librte_eal/linuxapp/eal/eal.c25
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_interrupts.c1
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_memory.c176
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci.c29
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_vfio.c2
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_thread.c10
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_xen_memory.c2
-rw-r--r--lib/librte_eal/linuxapp/eal/rte_eal_version.map44
-rw-r--r--lib/librte_eal/linuxapp/igb_uio/igb_uio.c33
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h2
-rw-r--r--lib/librte_efd/rte_efd.c22
-rw-r--r--lib/librte_efd/rte_efd_arm64.h76
-rw-r--r--lib/librte_ether/Makefile5
-rw-r--r--lib/librte_ether/rte_ethdev.c261
-rw-r--r--lib/librte_ether/rte_ethdev.h347
-rw-r--r--lib/librte_ether/rte_ethdev_pci.h11
-rw-r--r--lib/librte_ether/rte_ethdev_vdev.h1
-rw-r--r--lib/librte_ether/rte_ether_version.map51
-rw-r--r--lib/librte_ether/rte_flow.c249
-rw-r--r--lib/librte_ether/rte_flow.h176
-rw-r--r--lib/librte_ether/rte_flow_driver.h5
-rw-r--r--lib/librte_ether/rte_tm.c438
-rw-r--r--lib/librte_ether/rte_tm.h1912
-rw-r--r--lib/librte_ether/rte_tm_driver.h366
-rw-r--r--lib/librte_eventdev/Makefile10
-rw-r--r--lib/librte_eventdev/rte_event_ring.c207
-rw-r--r--lib/librte_eventdev/rte_event_ring.h308
-rw-r--r--lib/librte_eventdev/rte_eventdev.c169
-rw-r--r--lib/librte_eventdev/rte_eventdev.h217
-rw-r--r--lib/librte_eventdev/rte_eventdev_pmd.h105
-rw-r--r--lib/librte_eventdev/rte_eventdev_pmd_pci.h162
-rw-r--r--lib/librte_eventdev/rte_eventdev_pmd_vdev.h134
-rw-r--r--lib/librte_eventdev/rte_eventdev_version.map9
-rw-r--r--lib/librte_gro/Makefile51
-rw-r--r--lib/librte_gro/gro_tcp4.c505
-rw-r--r--lib/librte_gro/gro_tcp4.h210
-rw-r--r--lib/librte_gro/rte_gro.c278
-rw-r--r--lib/librte_gro/rte_gro.h222
-rw-r--r--lib/librte_gro/rte_gro_version.map12
-rw-r--r--lib/librte_hash/Makefile2
-rw-r--r--lib/librte_hash/rte_cmp_arm64.h4
-rw-r--r--lib/librte_hash/rte_cmp_x86.h6
-rw-r--r--lib/librte_hash/rte_crc_arm64.h8
-rw-r--r--lib/librte_hash/rte_cuckoo_hash.c27
-rw-r--r--lib/librte_hash/rte_cuckoo_hash.h2
-rw-r--r--lib/librte_hash/rte_fbk_hash.h2
-rw-r--r--lib/librte_hash/rte_hash_crc.h36
-rw-r--r--lib/librte_hash/rte_thash.h13
-rw-r--r--lib/librte_ip_frag/ip_frag_common.h20
-rw-r--r--lib/librte_ip_frag/ip_frag_internal.c10
-rw-r--r--lib/librte_ip_frag/rte_ip_frag.h11
-rw-r--r--lib/librte_ip_frag/rte_ip_frag_common.c13
-rw-r--r--lib/librte_ip_frag/rte_ipfrag_version.map7
-rw-r--r--lib/librte_ip_frag/rte_ipv4_fragmentation.c19
-rw-r--r--lib/librte_ip_frag/rte_ipv4_reassembly.c2
-rw-r--r--lib/librte_ip_frag/rte_ipv6_reassembly.c2
-rw-r--r--lib/librte_jobstats/rte_jobstats.h2
-rw-r--r--lib/librte_kni/rte_kni.c7
-rw-r--r--lib/librte_lpm/rte_lpm.c1
-rw-r--r--lib/librte_lpm/rte_lpm6.c1
-rw-r--r--lib/librte_lpm/rte_lpm_neon.h4
-rw-r--r--lib/librte_lpm/rte_lpm_sse.h3
-rw-r--r--lib/librte_mbuf/rte_mbuf.c6
-rw-r--r--lib/librte_mbuf/rte_mbuf.h14
-rw-r--r--lib/librte_mbuf/rte_mbuf_ptype.h4
-rw-r--r--lib/librte_mempool/rte_mempool.c2
-rw-r--r--lib/librte_mempool/rte_mempool.h20
-rw-r--r--lib/librte_metrics/rte_metrics.c6
-rw-r--r--lib/librte_metrics/rte_metrics.h3
-rw-r--r--lib/librte_net/net_crc_neon.h297
-rw-r--r--lib/librte_net/net_crc_sse.h10
-rw-r--r--lib/librte_net/rte_net_crc.c39
-rw-r--r--lib/librte_net/rte_net_crc.h2
-rw-r--r--lib/librte_pdump/rte_pdump.c1
-rw-r--r--lib/librte_port/rte_port_ring.c4
-rw-r--r--lib/librte_reorder/rte_reorder.h2
-rw-r--r--lib/librte_ring/rte_ring.c29
-rw-r--r--lib/librte_ring/rte_ring.h140
-rw-r--r--lib/librte_sched/rte_sched.c24
-rw-r--r--lib/librte_table/Makefile6
-rw-r--r--lib/librte_table/rte_lru.h108
-rw-r--r--lib/librte_table/rte_lru_arm64.h88
-rw-r--r--lib/librte_table/rte_lru_x86.h130
-rw-r--r--lib/librte_timer/rte_timer.c4
-rw-r--r--lib/librte_vhost/rte_vhost.h16
-rw-r--r--lib/librte_vhost/rte_vhost_version.map7
-rw-r--r--lib/librte_vhost/socket.c46
-rw-r--r--lib/librte_vhost/vhost.c32
-rw-r--r--lib/librte_vhost/vhost.h19
-rw-r--r--lib/librte_vhost/vhost_user.c73
-rw-r--r--lib/librte_vhost/virtio_net.c58
194 files changed, 11704 insertions, 2871 deletions
diff --git a/lib/Makefile b/lib/Makefile
index 07e1fd0c..86caba17 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -52,7 +52,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += librte_cryptodev
DEPDIRS-librte_cryptodev := librte_eal librte_mempool librte_ring librte_mbuf
DEPDIRS-librte_cryptodev += librte_kvargs
DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += librte_eventdev
-DEPDIRS-librte_eventdev := librte_eal
+DEPDIRS-librte_eventdev := librte_eal librte_ring
DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ether
DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
@@ -68,6 +68,8 @@ DEPDIRS-librte_net := librte_mbuf librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += librte_ip_frag
DEPDIRS-librte_ip_frag := librte_eal librte_mempool librte_mbuf librte_ether
DEPDIRS-librte_ip_frag += librte_hash
+DIRS-$(CONFIG_RTE_LIBRTE_GRO) += librte_gro
+DEPDIRS-librte_gro := librte_eal librte_mbuf librte_ether librte_net
DIRS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += librte_jobstats
DEPDIRS-librte_jobstats := librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_METRICS) += librte_metrics
diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile
index e2dacd60..59767920 100644
--- a/lib/librte_acl/Makefile
+++ b/lib/librte_acl/Makefile
@@ -51,15 +51,14 @@ SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_scalar.c
ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),)
SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_neon.c
-CFLAGS_acl_run_neon.o += -flax-vector-conversions -Wno-maybe-uninitialized
+CFLAGS_acl_run_neon.o += -flax-vector-conversions
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_acl_run_neon.o += -Wno-maybe-uninitialized
+endif
else ifeq ($(CONFIG_RTE_ARCH_PPC_64),y)
SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_altivec.c
else
SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_sse.c
-#check if flag for SSE4.1 is already on, if not set it up manually
- ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSE4_1,$(CFLAGS)),)
- CFLAGS_acl_run_sse.o += -msse4.1
- endif
endif
#
diff --git a/lib/librte_acl/acl_run_altivec.h b/lib/librte_acl/acl_run_altivec.h
index 7d329bcf..62fd6a22 100644
--- a/lib/librte_acl/acl_run_altivec.h
+++ b/lib/librte_acl/acl_run_altivec.h
@@ -104,13 +104,13 @@ resolve_priority_altivec(uint64_t transition, int n,
/*
* Check for any match in 4 transitions
*/
-static inline __attribute__((always_inline)) uint32_t
+static __rte_always_inline uint32_t
check_any_match_x4(uint64_t val[])
{
return (val[0] | val[1] | val[2] | val[3]) & RTE_ACL_NODE_MATCH;
}
-static inline __attribute__((always_inline)) void
+static __rte_always_inline void
acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
struct acl_flow_data *flows, uint64_t transitions[])
{
diff --git a/lib/librte_acl/acl_run_avx2.h b/lib/librte_acl/acl_run_avx2.h
index b01a46a5..804e45af 100644
--- a/lib/librte_acl/acl_run_avx2.h
+++ b/lib/librte_acl/acl_run_avx2.h
@@ -86,7 +86,7 @@ static const rte_ymm_t ymm_range_base = {
* tr_hi contains high 32 bits for 8 transition.
* next_input contains up to 4 input bytes for 8 flows.
*/
-static inline __attribute__((always_inline)) ymm_t
+static __rte_always_inline ymm_t
transition8(ymm_t next_input, const uint64_t *trans, ymm_t *tr_lo, ymm_t *tr_hi)
{
const int32_t *tr;
diff --git a/lib/librte_acl/acl_run_neon.c b/lib/librte_acl/acl_run_neon.c
index b0144514..0b1c71c0 100644
--- a/lib/librte_acl/acl_run_neon.c
+++ b/lib/librte_acl/acl_run_neon.c
@@ -1,7 +1,7 @@
/*
* BSD LICENSE
*
- * Copyright (C) Cavium networks Ltd. 2015.
+ * Copyright (C) Cavium, Inc. 2015.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -13,7 +13,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
diff --git a/lib/librte_acl/acl_run_neon.h b/lib/librte_acl/acl_run_neon.h
index d233ff00..37881c45 100644
--- a/lib/librte_acl/acl_run_neon.h
+++ b/lib/librte_acl/acl_run_neon.h
@@ -1,7 +1,7 @@
/*
* BSD LICENSE
*
- * Copyright (C) Cavium networks Ltd. 2015.
+ * Copyright (C) Cavium, Inc. 2015.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -13,7 +13,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
@@ -99,13 +99,13 @@ resolve_priority_neon(uint64_t transition, int n, const struct rte_acl_ctx *ctx,
/*
* Check for any match in 4 transitions
*/
-static inline __attribute__((always_inline)) uint32_t
+static __rte_always_inline uint32_t
check_any_match_x4(uint64_t val[])
{
return (val[0] | val[1] | val[2] | val[3]) & RTE_ACL_NODE_MATCH;
}
-static inline __attribute__((always_inline)) void
+static __rte_always_inline void
acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
struct acl_flow_data *flows, uint64_t transitions[])
{
@@ -124,7 +124,7 @@ acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
/*
* Process 4 transitions (in 2 NEON Q registers) in parallel
*/
-static inline __attribute__((always_inline)) int32x4_t
+static __rte_always_inline int32x4_t
transition4(int32x4_t next_input, const uint64_t *trans, uint64_t transitions[])
{
int32x4x2_t tr_hi_lo;
diff --git a/lib/librte_acl/acl_run_sse.h b/lib/librte_acl/acl_run_sse.h
index ad40a674..72f66e4f 100644
--- a/lib/librte_acl/acl_run_sse.h
+++ b/lib/librte_acl/acl_run_sse.h
@@ -149,7 +149,7 @@ acl_process_matches(xmm_t *indices, int slot, const struct rte_acl_ctx *ctx,
/*
* Check for any match in 4 transitions (contained in 2 SSE registers)
*/
-static inline __attribute__((always_inline)) void
+static __rte_always_inline void
acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
struct acl_flow_data *flows, xmm_t *indices1, xmm_t *indices2,
xmm_t match_mask)
@@ -176,7 +176,7 @@ acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
/*
* Process 4 transitions (in 2 XMM registers) in parallel
*/
-static inline __attribute__((always_inline)) xmm_t
+static __rte_always_inline xmm_t
transition4(xmm_t next_input, const uint64_t *trans,
xmm_t *indices1, xmm_t *indices2)
{
diff --git a/lib/librte_acl/rte_acl_osdep.h b/lib/librte_acl/rte_acl_osdep.h
index 41f7e3d4..9e4af530 100644
--- a/lib/librte_acl/rte_acl_osdep.h
+++ b/lib/librte_acl/rte_acl_osdep.h
@@ -74,7 +74,6 @@
#include <rte_errno.h>
#include <rte_string_fns.h>
#include <rte_cpuflags.h>
-#include <rte_log.h>
#include <rte_debug.h>
#endif /* _RTE_ACL_OSDEP_H_ */
diff --git a/lib/librte_bitratestats/rte_bitrate.c b/lib/librte_bitratestats/rte_bitrate.c
index 193aa690..3ceb3516 100644
--- a/lib/librte_bitratestats/rte_bitrate.c
+++ b/lib/librte_bitratestats/rte_bitrate.c
@@ -112,7 +112,7 @@ rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data,
port_data->peak_ibits = cnt_bits;
delta = cnt_bits;
delta -= port_data->ewma_ibits;
- /* The +-50 fixes integer rounding during divison */
+ /* The +-50 fixes integer rounding during division */
if (delta > 0)
delta = (delta * alpha_percent + 50) / 100;
else
diff --git a/lib/librte_cmdline/cmdline_parse.c b/lib/librte_cmdline/cmdline_parse.c
index b8148808..56491eac 100644
--- a/lib/librte_cmdline/cmdline_parse.c
+++ b/lib/librte_cmdline/cmdline_parse.c
@@ -139,6 +139,21 @@ nb_common_chars(const char * s1, const char * s2)
return i;
}
+/** Retrieve either static or dynamic token at a given index. */
+static cmdline_parse_token_hdr_t *
+get_token(cmdline_parse_inst_t *inst, unsigned int index)
+{
+ cmdline_parse_token_hdr_t *token_p;
+
+ /* check presence of static tokens first */
+ if (inst->tokens[0] || !inst->f)
+ return inst->tokens[index];
+ /* generate dynamic token */
+ token_p = NULL;
+ inst->f(&token_p, NULL, &inst->tokens[index]);
+ return token_p;
+}
+
/**
* try to match the buffer with an instruction (only the first
* nb_match_token tokens if != 0). Return 0 if we match all the
@@ -146,27 +161,20 @@ nb_common_chars(const char * s1, const char * s2)
*/
static int
match_inst(cmdline_parse_inst_t *inst, const char *buf,
- unsigned int nb_match_token, void *resbuf, unsigned resbuf_size,
- cmdline_parse_token_hdr_t
- *(*dyn_tokens)[CMDLINE_PARSE_DYNAMIC_TOKENS])
+ unsigned int nb_match_token, void *resbuf, unsigned resbuf_size)
{
- unsigned int token_num=0;
cmdline_parse_token_hdr_t * token_p;
unsigned int i=0;
int n = 0;
struct cmdline_token_hdr token_hdr;
- token_p = inst->tokens[token_num];
- if (!token_p && dyn_tokens && inst->f) {
- if (!(*dyn_tokens)[0])
- inst->f(&(*dyn_tokens)[0], NULL, dyn_tokens);
- token_p = (*dyn_tokens)[0];
- }
- if (token_p)
+ /* check if we match all tokens of inst */
+ while (!nb_match_token || i < nb_match_token) {
+ token_p = get_token(inst, i);
+ if (!token_p)
+ break;
memcpy(&token_hdr, token_p, sizeof(token_hdr));
- /* check if we match all tokens of inst */
- while (token_p && (!nb_match_token || i<nb_match_token)) {
debug_printf("TK\n");
/* skip spaces */
while (isblank2(*buf)) {
@@ -201,21 +209,6 @@ match_inst(cmdline_parse_inst_t *inst, const char *buf,
debug_printf("TK parsed (len=%d)\n", n);
i++;
buf += n;
-
- token_num ++;
- if (!inst->tokens[0]) {
- if (token_num < (CMDLINE_PARSE_DYNAMIC_TOKENS - 1)) {
- if (!(*dyn_tokens)[token_num])
- inst->f(&(*dyn_tokens)[token_num],
- NULL,
- dyn_tokens);
- token_p = (*dyn_tokens)[token_num];
- } else
- token_p = NULL;
- } else
- token_p = inst->tokens[token_num];
- if (token_p)
- memcpy(&token_hdr, token_p, sizeof(token_hdr));
}
/* does not match */
@@ -259,7 +252,6 @@ cmdline_parse(struct cmdline *cl, const char * buf)
char buf[CMDLINE_PARSE_RESULT_BUFSIZE];
long double align; /* strong alignment constraint for buf */
} result, tmp_result;
- cmdline_parse_token_hdr_t *dyn_tokens[CMDLINE_PARSE_DYNAMIC_TOKENS];
void (*f)(void *, struct cmdline *, void *) = NULL;
void *data = NULL;
int comment = 0;
@@ -276,7 +268,6 @@ cmdline_parse(struct cmdline *cl, const char * buf)
return CMDLINE_PARSE_BAD_ARGS;
ctx = cl->ctx;
- memset(&dyn_tokens, 0, sizeof(dyn_tokens));
/*
* - look if the buffer contains at least one line
@@ -322,7 +313,7 @@ cmdline_parse(struct cmdline *cl, const char * buf)
/* fully parsed */
tok = match_inst(inst, buf, 0, tmp_result.buf,
- sizeof(tmp_result.buf), &dyn_tokens);
+ sizeof(tmp_result.buf));
if (tok > 0) /* we matched at least one token */
err = CMDLINE_PARSE_BAD_ARGS;
@@ -380,7 +371,6 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state,
cmdline_parse_token_hdr_t *token_p;
struct cmdline_token_hdr token_hdr;
char tmpbuf[CMDLINE_BUFFER_SIZE], comp_buf[CMDLINE_BUFFER_SIZE];
- cmdline_parse_token_hdr_t *dyn_tokens[CMDLINE_PARSE_DYNAMIC_TOKENS];
unsigned int partial_tok_len;
int comp_len = -1;
int tmp_len = -1;
@@ -400,7 +390,6 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state,
debug_printf("%s called\n", __func__);
memset(&token_hdr, 0, sizeof(token_hdr));
- memset(&dyn_tokens, 0, sizeof(dyn_tokens));
/* count the number of complete token to parse */
for (i=0 ; buf[i] ; i++) {
@@ -424,23 +413,11 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state,
while (inst) {
/* parse the first tokens of the inst */
if (nb_token &&
- match_inst(inst, buf, nb_token, NULL, 0,
- &dyn_tokens))
+ match_inst(inst, buf, nb_token, NULL, 0))
goto next;
debug_printf("instruction match\n");
- if (!inst->tokens[0]) {
- if (nb_token <
- (CMDLINE_PARSE_DYNAMIC_TOKENS - 1)) {
- if (!dyn_tokens[nb_token])
- inst->f(&dyn_tokens[nb_token],
- NULL,
- &dyn_tokens);
- token_p = dyn_tokens[nb_token];
- } else
- token_p = NULL;
- } else
- token_p = inst->tokens[nb_token];
+ token_p = get_token(inst, nb_token);
if (token_p)
memcpy(&token_hdr, token_p, sizeof(token_hdr));
@@ -531,20 +508,10 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state,
inst = ctx[inst_num];
if (nb_token &&
- match_inst(inst, buf, nb_token, NULL, 0, &dyn_tokens))
+ match_inst(inst, buf, nb_token, NULL, 0))
goto next2;
- if (!inst->tokens[0]) {
- if (nb_token < (CMDLINE_PARSE_DYNAMIC_TOKENS - 1)) {
- if (!dyn_tokens[nb_token])
- inst->f(&dyn_tokens[nb_token],
- NULL,
- &dyn_tokens);
- token_p = dyn_tokens[nb_token];
- } else
- token_p = NULL;
- } else
- token_p = inst->tokens[nb_token];
+ token_p = get_token(inst, nb_token);
if (token_p)
memcpy(&token_hdr, token_p, sizeof(token_hdr));
diff --git a/lib/librte_cmdline/cmdline_parse.h b/lib/librte_cmdline/cmdline_parse.h
index 65b18d4f..13e086f2 100644
--- a/lib/librte_cmdline/cmdline_parse.h
+++ b/lib/librte_cmdline/cmdline_parse.h
@@ -83,9 +83,6 @@ extern "C" {
/* maximum buffer size for parsed result */
#define CMDLINE_PARSE_RESULT_BUFSIZE 8192
-/* maximum number of dynamic tokens */
-#define CMDLINE_PARSE_DYNAMIC_TOKENS 128
-
/**
* Stores a pointer to the ops struct, and the offset: the place to
* write the parsed result in the destination structure.
@@ -137,20 +134,53 @@ struct cmdline;
* When no tokens are defined (tokens[0] == NULL), they are retrieved
* dynamically by calling f() as follows:
*
- * f((struct cmdline_token_hdr **)&token_hdr,
- * NULL,
- * (struct cmdline_token_hdr *[])tokens));
+ * @code
+ *
+ * f((struct cmdline_token_hdr **)&token_p,
+ * NULL,
+ * (struct cmdline_token_hdr **)&inst->tokens[num]);
+ *
+ * @endcode
*
* The address of the resulting token is expected at the location pointed by
* the first argument. Can be set to NULL to end the list.
*
* The cmdline argument (struct cmdline *) is always NULL.
*
- * The last argument points to the NULL-terminated list of dynamic tokens
- * defined so far. Since token_hdr points to an index of that list, the
- * current index can be derived as follows:
+ * The last argument points to the inst->tokens[] entry to retrieve, which
+ * is not necessarily inside allocated memory and should neither be read nor
+ * written. Its sole purpose is to deduce the token entry index of interest
+ * as described in the example below.
+ *
+ * Note about constraints:
+ *
+ * - Only the address of these tokens is dynamic, their storage should be
+ * static like normal tokens.
+ * - Dynamic token lists that need to maintain an internal context (e.g. in
+ * order to determine the next token) must store it statically also. This
+ * context must be reinitialized when the first token is requested, that
+ * is, when &inst->tokens[0] is provided as the third argument.
+ * - Dynamic token lists must be NULL-terminated to generate usable
+ * commands.
+ *
+ * @code
+ *
+ * // Assuming first and third arguments are respectively named "token_p"
+ * // and "token":
+ *
+ * int index = token - inst->tokens;
+ *
+ * if (!index) {
+ * [...] // Clean up internal context if any.
+ * }
+ * [...] // Then set up dyn_token according to index.
+ *
+ * if (no_more_tokens)
+ * *token_p = NULL;
+ * else
+ * *token_p = &dyn_token;
*
- * int index = token_hdr - &(*tokens)[0];
+ * @endcode
*/
struct cmdline_inst {
/* f(parsed_struct, data) */
diff --git a/lib/librte_cmdline/cmdline_parse_etheraddr.c b/lib/librte_cmdline/cmdline_parse_etheraddr.c
index dbfe4a61..da02d2c9 100644
--- a/lib/librte_cmdline/cmdline_parse_etheraddr.c
+++ b/lib/librte_cmdline/cmdline_parse_etheraddr.c
@@ -65,7 +65,6 @@
#include <inttypes.h>
#include <ctype.h>
#include <string.h>
-#include <errno.h>
#include <sys/types.h>
#include <net/ethernet.h>
diff --git a/lib/librte_compat/rte_compat.h b/lib/librte_compat/rte_compat.h
index 1c3c8d52..41e8032b 100644
--- a/lib/librte_compat/rte_compat.h
+++ b/lib/librte_compat/rte_compat.h
@@ -39,7 +39,7 @@
* When a symol is exported from a library to provide an API, it also provides a
* calling convention (ABI) that is embodied in its name, return type,
* arguments, etc. On occasion that function may need to change to accommodate
- * new functionality, behavior, etc. When that occurs, it is desireable to
+ * new functionality, behavior, etc. When that occurs, it is desirable to
* allow for backwards compatibility for a time with older binaries that are
* dynamically linked to the dpdk. To support that, the __vsym and
* VERSION_SYMBOL macros are created. They, in conjunction with the
diff --git a/lib/librte_cryptodev/Makefile b/lib/librte_cryptodev/Makefile
index 18f5e8c5..6ac331bc 100644
--- a/lib/librte_cryptodev/Makefile
+++ b/lib/librte_cryptodev/Makefile
@@ -34,20 +34,22 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_cryptodev.a
# library version
-LIBABIVER := 2
+LIBABIVER := 3
# build flags
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
# library source files
-SRCS-y += rte_cryptodev.c
+SRCS-y += rte_cryptodev.c rte_cryptodev_pmd.c
# export include files
SYMLINK-y-include += rte_crypto.h
SYMLINK-y-include += rte_crypto_sym.h
SYMLINK-y-include += rte_cryptodev.h
SYMLINK-y-include += rte_cryptodev_pmd.h
+SYMLINK-y-include += rte_cryptodev_vdev.h
+SYMLINK-y-include += rte_cryptodev_pci.h
# versioning export map
EXPORT_MAP := rte_cryptodev_version.map
diff --git a/lib/librte_cryptodev/rte_crypto.h b/lib/librte_cryptodev/rte_crypto.h
index 90195188..10fe0804 100644
--- a/lib/librte_cryptodev/rte_crypto.h
+++ b/lib/librte_cryptodev/rte_crypto.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -66,8 +66,6 @@ enum rte_crypto_op_status {
/**< Operation completed successfully */
RTE_CRYPTO_OP_STATUS_NOT_PROCESSED,
/**< Operation has not yet been processed by a crypto device */
- RTE_CRYPTO_OP_STATUS_ENQUEUED,
- /**< Operation is enqueued on device */
RTE_CRYPTO_OP_STATUS_AUTH_FAILED,
/**< Authentication verification failed */
RTE_CRYPTO_OP_STATUS_INVALID_SESSION,
@@ -82,6 +80,16 @@ enum rte_crypto_op_status {
};
/**
+ * Crypto operation session type. This is used to specify whether a crypto
+ * operation has session structure attached for immutable parameters or if all
+ * operation information is included in the operation data structure.
+ */
+enum rte_crypto_op_sess_type {
+ RTE_CRYPTO_OP_WITH_SESSION, /**< Session based crypto operation */
+ RTE_CRYPTO_OP_SESSIONLESS /**< Session-less crypto operation */
+};
+
+/**
* Cryptographic Operation.
*
* This structure contains data relating to performing cryptographic
@@ -92,32 +100,32 @@ enum rte_crypto_op_status {
* rte_cryptodev_enqueue_burst() / rte_cryptodev_dequeue_burst() .
*/
struct rte_crypto_op {
- enum rte_crypto_op_type type;
+ uint8_t type;
/**< operation type */
-
- enum rte_crypto_op_status status;
+ uint8_t status;
/**<
* operation status - this is reset to
* RTE_CRYPTO_OP_STATUS_NOT_PROCESSED on allocation from mempool and
* will be set to RTE_CRYPTO_OP_STATUS_SUCCESS after crypto operation
* is successfully processed by a crypto PMD
*/
+ uint8_t sess_type;
+ /**< operation session type */
+ uint8_t reserved[5];
+ /**< Reserved bytes to fill 64 bits for future additions */
struct rte_mempool *mempool;
/**< crypto operation mempool which operation is allocated from */
phys_addr_t phys_addr;
/**< physical address of crypto operation */
- void *opaque_data;
- /**< Opaque pointer for user data */
-
RTE_STD_C11
union {
- struct rte_crypto_sym_op *sym;
+ struct rte_crypto_sym_op sym[0];
/**< Symmetric operation parameters */
}; /**< operation specific parameters */
-} __rte_cache_aligned;
+};
/**
* Reset the fields of a crypto operation to their default values.
@@ -130,22 +138,15 @@ __rte_crypto_op_reset(struct rte_crypto_op *op, enum rte_crypto_op_type type)
{
op->type = type;
op->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+ op->sess_type = RTE_CRYPTO_OP_SESSIONLESS;
switch (type) {
case RTE_CRYPTO_OP_TYPE_SYMMETRIC:
- /** Symmetric operation structure starts after the end of the
- * rte_crypto_op structure.
- */
- op->sym = (struct rte_crypto_sym_op *)(op + 1);
- op->type = type;
-
__rte_crypto_sym_op_reset(op->sym);
break;
default:
break;
}
-
- op->opaque_data = NULL;
}
/**
@@ -265,8 +266,9 @@ rte_crypto_op_alloc(struct rte_mempool *mempool, enum rte_crypto_op_type type)
* @param nb_ops Number of crypto operations to allocate
*
* @returns
- * - On success returns a valid rte_crypto_op structure
- * - On failure returns NULL
+ * - nb_ops if the number of operations requested were allocated.
+ * - 0 if the requested number of ops are not available.
+ * None are allocated in this case.
*/
static inline unsigned
@@ -407,6 +409,8 @@ rte_crypto_op_attach_sym_session(struct rte_crypto_op *op,
if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC))
return -1;
+ op->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
+
return __rte_crypto_sym_op_attach_sym_session(op->sym, sess);
}
diff --git a/lib/librte_cryptodev/rte_crypto_sym.h b/lib/librte_cryptodev/rte_crypto_sym.h
index 3a408448..0ceaa917 100644
--- a/lib/librte_cryptodev/rte_crypto_sym.h
+++ b/lib/librte_cryptodev/rte_crypto_sym.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -68,28 +68,12 @@ enum rte_crypto_cipher_algorithm {
RTE_CRYPTO_CIPHER_AES_CBC,
/**< AES algorithm in CBC mode */
- RTE_CRYPTO_CIPHER_AES_CCM,
- /**< AES algorithm in CCM mode. When this cipher algorithm is used the
- * *RTE_CRYPTO_AUTH_AES_CCM* element of the
- * *rte_crypto_hash_algorithm* enum MUST be used to set up the related
- * *rte_crypto_auth_xform* structure in the session context or in
- * the op_params of the crypto operation structure in the case of a
- * session-less crypto operation
- */
RTE_CRYPTO_CIPHER_AES_CTR,
/**< AES algorithm in Counter mode */
RTE_CRYPTO_CIPHER_AES_ECB,
/**< AES algorithm in ECB mode */
RTE_CRYPTO_CIPHER_AES_F8,
/**< AES algorithm in F8 mode */
- RTE_CRYPTO_CIPHER_AES_GCM,
- /**< AES algorithm in GCM mode. When this cipher algorithm is used the
- * *RTE_CRYPTO_AUTH_AES_GCM* or *RTE_CRYPTO_AUTH_AES_GMAC* element
- * of the *rte_crypto_auth_algorithm* enum MUST be used to set up
- * the related *rte_crypto_auth_setup_data* structure in the session
- * context or in the op_params of the crypto operation structure
- * in the case of a session-less crypto operation.
- */
RTE_CRYPTO_CIPHER_AES_XTS,
/**< AES algorithm in XTS mode */
@@ -159,7 +143,7 @@ struct rte_crypto_cipher_xform {
struct {
uint8_t *data; /**< pointer to key data */
- size_t length; /**< key length in bytes */
+ uint16_t length;/**< key length in bytes */
} key;
/**< Cipher key
*
@@ -190,6 +174,55 @@ struct rte_crypto_cipher_xform {
* - Each key can be either 128 bits (16 bytes) or 256 bits (32 bytes).
* - Both keys must have the same size.
**/
+ struct {
+ uint16_t offset;
+ /**< Starting point for Initialisation Vector or Counter,
+ * specified as number of bytes from start of crypto
+ * operation (rte_crypto_op).
+ *
+ * - For block ciphers in CBC or F8 mode, or for KASUMI
+ * in F8 mode, or for SNOW 3G in UEA2 mode, this is the
+ * Initialisation Vector (IV) value.
+ *
+ * - For block ciphers in CTR mode, this is the counter.
+ *
+ * - For GCM mode, this is either the IV (if the length
+ * is 96 bits) or J0 (for other sizes), where J0 is as
+ * defined by NIST SP800-38D. Regardless of the IV
+ * length, a full 16 bytes needs to be allocated.
+ *
+ * - For CCM mode, the first byte is reserved, and the
+ * nonce should be written starting at &iv[1] (to allow
+ * space for the implementation to write in the flags
+ * in the first byte). Note that a full 16 bytes should
+ * be allocated, even though the length field will
+ * have a value less than this.
+ *
+ * - For AES-XTS, this is the 128bit tweak, i, from
+ * IEEE Std 1619-2007.
+ *
+ * For optimum performance, the data pointed to SHOULD
+ * be 8-byte aligned.
+ */
+ uint16_t length;
+ /**< Length of valid IV data.
+ *
+ * - For block ciphers in CBC or F8 mode, or for KASUMI
+ * in F8 mode, or for SNOW 3G in UEA2 mode, this is the
+ * length of the IV (which must be the same as the
+ * block length of the cipher).
+ *
+ * - For block ciphers in CTR mode, this is the length
+ * of the counter (which must be the same as the block
+ * length of the cipher).
+ *
+ * - For GCM mode, this is either 12 (for 96-bit IVs)
+ * or 16, in which case data points to J0.
+ *
+ * - For CCM mode, this is the length of the nonce,
+ * which can be in the range 7 to 13 inclusive.
+ */
+ } iv; /**< Initialisation vector parameters */
};
/** Symmetric Authentication / Hash Algorithms */
@@ -199,33 +232,10 @@ enum rte_crypto_auth_algorithm {
RTE_CRYPTO_AUTH_AES_CBC_MAC,
/**< AES-CBC-MAC algorithm. Only 128-bit keys are supported. */
- RTE_CRYPTO_AUTH_AES_CCM,
- /**< AES algorithm in CCM mode. This is an authenticated cipher. When
- * this hash algorithm is used, the *RTE_CRYPTO_CIPHER_AES_CCM*
- * element of the *rte_crypto_cipher_algorithm* enum MUST be used to
- * set up the related rte_crypto_cipher_setup_data structure in the
- * session context or the corresponding parameter in the crypto
- * operation data structures op_params parameter MUST be set for a
- * session-less crypto operation.
- */
RTE_CRYPTO_AUTH_AES_CMAC,
/**< AES CMAC algorithm. */
- RTE_CRYPTO_AUTH_AES_GCM,
- /**< AES algorithm in GCM mode. When this hash algorithm
- * is used, the RTE_CRYPTO_CIPHER_AES_GCM element of the
- * rte_crypto_cipher_algorithm enum MUST be used to set up the related
- * rte_crypto_cipher_setup_data structure in the session context, or
- * the corresponding parameter in the crypto operation data structures
- * op_params parameter MUST be set for a session-less crypto operation.
- */
RTE_CRYPTO_AUTH_AES_GMAC,
- /**< AES GMAC algorithm. When this hash algorithm
- * is used, the RTE_CRYPTO_CIPHER_AES_GCM element of the
- * rte_crypto_cipher_algorithm enum MUST be used to set up the related
- * rte_crypto_cipher_setup_data structure in the session context, or
- * the corresponding parameter in the crypto operation data structures
- * op_params parameter MUST be set for a session-less crypto operation.
- */
+ /**< AES GMAC algorithm. */
RTE_CRYPTO_AUTH_AES_XCBC_MAC,
/**< AES XCBC algorithm. */
@@ -296,7 +306,7 @@ struct rte_crypto_auth_xform {
struct {
uint8_t *data; /**< pointer to key data */
- size_t length; /**< key length in bytes */
+ uint16_t length;/**< key length in bytes */
} key;
/**< Authentication key data.
* The authentication key length MUST be less than or equal to the
@@ -305,7 +315,35 @@ struct rte_crypto_auth_xform {
* (for example RFC 2104, FIPS 198a).
*/
- uint32_t digest_length;
+ struct {
+ uint16_t offset;
+ /**< Starting point for Initialisation Vector or Counter,
+ * specified as number of bytes from start of crypto
+ * operation (rte_crypto_op).
+ *
+ * - For SNOW 3G in UIA2 mode, for ZUC in EIA3 mode and
+ * for AES-GMAC, this is the authentication
+ * Initialisation Vector (IV) value.
+ *
+ * - For KASUMI in F9 mode and other authentication
+ * algorithms, this field is not used.
+ *
+ * For optimum performance, the data pointed to SHOULD
+ * be 8-byte aligned.
+ */
+ uint16_t length;
+ /**< Length of valid IV data.
+ *
+ * - For SNOW3G in UIA2 mode, for ZUC in EIA3 mode and
+ * for AES-GMAC, this is the length of the IV.
+ *
+ * - For KASUMI in F9 mode and other authentication
+ * algorithms, this field is not used.
+ *
+ */
+ } iv; /**< Initialisation vector parameters */
+
+ uint16_t digest_length;
/**< Length of the digest to be returned. If the verify option is set,
* this specifies the length of the digest to be compared for the
* session.
@@ -315,42 +353,89 @@ struct rte_crypto_auth_xform {
* If the value is less than the maximum length allowed by the hash,
* the result shall be truncated.
*/
+};
- uint32_t add_auth_data_length;
- /**< The length of the additional authenticated data (AAD) in bytes.
- * The maximum permitted value is 65535 (2^16 - 1) bytes, unless
- * otherwise specified below.
- *
- * This field must be specified when the hash algorithm is one of the
- * following:
- *
- * - For SNOW 3G (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2), this is the
- * length of the IV (which should be 16).
- *
- * - For GCM (@ref RTE_CRYPTO_AUTH_AES_GCM). In this case, this is
- * the length of the Additional Authenticated Data (called A, in NIST
- * SP800-38D).
- *
- * - For CCM (@ref RTE_CRYPTO_AUTH_AES_CCM). In this case, this is
- * the length of the associated data (called A, in NIST SP800-38C).
- * Note that this does NOT include the length of any padding, or the
- * 18 bytes reserved at the start of the above field to store the
- * block B0 and the encoded length. The maximum permitted value in
- * this case is 222 bytes.
- *
- * @note
- * For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) mode of operation
- * this field is not used and should be set to 0. Instead the length
- * of the AAD data is specified in additional authentication data
- * length field of the rte_crypto_sym_op_data structure
- */
+
+/** Symmetric AEAD Algorithms */
+enum rte_crypto_aead_algorithm {
+ RTE_CRYPTO_AEAD_AES_CCM = 1,
+ /**< AES algorithm in CCM mode. */
+ RTE_CRYPTO_AEAD_AES_GCM,
+ /**< AES algorithm in GCM mode. */
+ RTE_CRYPTO_AEAD_LIST_END
+};
+
+/** AEAD algorithm name strings */
+extern const char *
+rte_crypto_aead_algorithm_strings[];
+
+/** Symmetric AEAD Operations */
+enum rte_crypto_aead_operation {
+ RTE_CRYPTO_AEAD_OP_ENCRYPT,
+ /**< Encrypt and generate digest */
+ RTE_CRYPTO_AEAD_OP_DECRYPT
+ /**< Verify digest and decrypt */
+};
+
+/** Authentication operation name strings */
+extern const char *
+rte_crypto_aead_operation_strings[];
+
+struct rte_crypto_aead_xform {
+ enum rte_crypto_aead_operation op;
+ /**< AEAD operation type */
+ enum rte_crypto_aead_algorithm algo;
+ /**< AEAD algorithm selection */
+
+ struct {
+ uint8_t *data; /**< pointer to key data */
+ uint16_t length;/**< key length in bytes */
+ } key;
+
+ struct {
+ uint16_t offset;
+ /**< Starting point for Initialisation Vector or Counter,
+ * specified as number of bytes from start of crypto
+ * operation (rte_crypto_op).
+ *
+ * - For GCM mode, this is either the IV (if the length
+ * is 96 bits) or J0 (for other sizes), where J0 is as
+ * defined by NIST SP800-38D. Regardless of the IV
+ * length, a full 16 bytes needs to be allocated.
+ *
+ * - For CCM mode, the first byte is reserved, and the
+ * nonce should be written starting at &iv[1] (to allow
+ * space for the implementation to write in the flags
+ * in the first byte). Note that a full 16 bytes should
+ * be allocated, even though the length field will
+ * have a value less than this.
+ *
+ * For optimum performance, the data pointed to SHOULD
+ * be 8-byte aligned.
+ */
+ uint16_t length;
+ /**< Length of valid IV data.
+ *
+ * - For GCM mode, this is either 12 (for 96-bit IVs)
+ * or 16, in which case data points to J0.
+ *
+ * - For CCM mode, this is the length of the nonce,
+ * which can be in the range 7 to 13 inclusive.
+ */
+ } iv; /**< Initialisation vector parameters */
+
+ uint16_t digest_length;
+
+ uint16_t aad_length;
+ /**< The length of the additional authenticated data (AAD) in bytes. */
};
/** Crypto transformation types */
enum rte_crypto_sym_xform_type {
RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED = 0, /**< No xform specified */
RTE_CRYPTO_SYM_XFORM_AUTH, /**< Authentication xform */
- RTE_CRYPTO_SYM_XFORM_CIPHER /**< Cipher xform */
+ RTE_CRYPTO_SYM_XFORM_CIPHER, /**< Cipher xform */
+ RTE_CRYPTO_SYM_XFORM_AEAD /**< AEAD xform */
};
/**
@@ -373,20 +458,11 @@ struct rte_crypto_sym_xform {
/**< Authentication / hash xform */
struct rte_crypto_cipher_xform cipher;
/**< Cipher xform */
+ struct rte_crypto_aead_xform aead;
+ /**< AEAD xform */
};
};
-/**
- * Crypto operation session type. This is used to specify whether a crypto
- * operation has session structure attached for immutable parameters or if all
- * operation information is included in the operation data structure.
- */
-enum rte_crypto_sym_op_sess_type {
- RTE_CRYPTO_SYM_OP_WITH_SESSION, /**< Session based crypto operation */
- RTE_CRYPTO_SYM_OP_SESSIONLESS /**< Session-less crypto operation */
-};
-
-
struct rte_cryptodev_sym_session;
/**
@@ -423,8 +499,6 @@ struct rte_crypto_sym_op {
struct rte_mbuf *m_src; /**< source mbuf */
struct rte_mbuf *m_dst; /**< destination mbuf */
- enum rte_crypto_sym_op_sess_type sess_type;
-
RTE_STD_C11
union {
struct rte_cryptodev_sym_session *session;
@@ -433,227 +507,182 @@ struct rte_crypto_sym_op {
/**< Session-less API crypto operation parameters */
};
- struct {
- struct {
- uint32_t offset;
- /**< Starting point for cipher processing, specified
- * as number of bytes from start of data in the source
- * buffer. The result of the cipher operation will be
- * written back into the output buffer starting at
- * this location.
- *
- * @note
- * For SNOW 3G @ RTE_CRYPTO_CIPHER_SNOW3G_UEA2,
- * KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8
- * and ZUC @ RTE_CRYPTO_CIPHER_ZUC_EEA3,
- * this field should be in bits.
- */
-
- uint32_t length;
- /**< The message length, in bytes, of the source buffer
- * on which the cryptographic operation will be
- * computed. This must be a multiple of the block size
- * if a block cipher is being used. This is also the
- * same as the result length.
- *
- * @note
- * In the case of CCM @ref RTE_CRYPTO_AUTH_AES_CCM,
- * this value should not include the length of the
- * padding or the length of the MAC; the driver will
- * compute the actual number of bytes over which the
- * encryption will occur, which will include these
- * values.
- *
- * @note
- * For AES-GMAC @ref RTE_CRYPTO_AUTH_AES_GMAC, this
- * field should be set to 0.
- *
- * @note
- * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UEA2,
- * KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8
- * and ZUC @ RTE_CRYPTO_CIPHER_ZUC_EEA3,
- * this field should be in bits.
- */
- } data; /**< Data offsets and length for ciphering */
-
- struct {
- uint8_t *data;
- /**< Initialisation Vector or Counter.
- *
- * - For block ciphers in CBC or F8 mode, or for KASUMI
- * in F8 mode, or for SNOW 3G in UEA2 mode, this is the
- * Initialisation Vector (IV) value.
- *
- * - For block ciphers in CTR mode, this is the counter.
- *
- * - For GCM mode, this is either the IV (if the length
- * is 96 bits) or J0 (for other sizes), where J0 is as
- * defined by NIST SP800-38D. Regardless of the IV
- * length, a full 16 bytes needs to be allocated.
- *
- * - For CCM mode, the first byte is reserved, and the
- * nonce should be written starting at &iv[1] (to allow
- * space for the implementation to write in the flags
- * in the first byte). Note that a full 16 bytes should
- * be allocated, even though the length field will
- * have a value less than this.
- *
- * - For AES-XTS, this is the 128bit tweak, i, from
- * IEEE Std 1619-2007.
- *
- * For optimum performance, the data pointed to SHOULD
- * be 8-byte aligned.
- */
- phys_addr_t phys_addr;
- uint16_t length;
- /**< Length of valid IV data.
- *
- * - For block ciphers in CBC or F8 mode, or for KASUMI
- * in F8 mode, or for SNOW 3G in UEA2 mode, this is the
- * length of the IV (which must be the same as the
- * block length of the cipher).
- *
- * - For block ciphers in CTR mode, this is the length
- * of the counter (which must be the same as the block
- * length of the cipher).
- *
- * - For GCM mode, this is either 12 (for 96-bit IVs)
- * or 16, in which case data points to J0.
- *
- * - For CCM mode, this is the length of the nonce,
- * which can be in the range 7 to 13 inclusive.
- */
- } iv; /**< Initialisation vector parameters */
- } cipher;
-
- struct {
- struct {
- uint32_t offset;
- /**< Starting point for hash processing, specified as
- * number of bytes from start of packet in source
- * buffer.
- *
- * @note
- * For CCM and GCM modes of operation, this field is
- * ignored. The field @ref aad field
- * should be set instead.
- *
- * @note For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC)
- * mode of operation, this field is set to 0. aad data
- * pointer of rte_crypto_sym_op_data structure is
- * used instead
- *
- * @note
- * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2,
- * KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9
- * and ZUC @ RTE_CRYPTO_AUTH_ZUC_EIA3,
- * this field should be in bits.
- */
-
- uint32_t length;
- /**< The message length, in bytes, of the source
- * buffer that the hash will be computed on.
- *
- * @note
- * For CCM and GCM modes of operation, this field is
- * ignored. The field @ref aad field should be set
- * instead.
- *
- * @note
- * For AES-GMAC @ref RTE_CRYPTO_AUTH_AES_GMAC mode
- * of operation, this field is set to 0.
- * Auth.aad.length is used instead.
- *
- * @note
- * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2,
- * KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9
- * and ZUC @ RTE_CRYPTO_AUTH_ZUC_EIA3,
- * this field should be in bits.
- */
- } data; /**< Data offsets and length for authentication */
-
+ RTE_STD_C11
+ union {
struct {
- uint8_t *data;
- /**< This points to the location where the digest result
- * should be inserted (in the case of digest generation)
- * or where the purported digest exists (in the case of
- * digest verification).
- *
- * At session creation time, the client specified the
- * digest result length with the digest_length member
- * of the @ref rte_crypto_auth_xform structure. For
- * physical crypto devices the caller must allocate at
- * least digest_length of physically contiguous memory
- * at this location.
- *
- * For digest generation, the digest result will
- * overwrite any data at this location.
- *
- * @note
- * For GCM (@ref RTE_CRYPTO_AUTH_AES_GCM), for
- * "digest result" read "authentication tag T".
- */
- phys_addr_t phys_addr;
- /**< Physical address of digest */
- uint16_t length;
- /**< Length of digest. This must be the same value as
- * @ref rte_crypto_auth_xform.digest_length.
- */
- } digest; /**< Digest parameters */
+ struct {
+ uint32_t offset;
+ /**< Starting point for AEAD processing, specified as
+ * number of bytes from start of packet in source
+ * buffer.
+ */
+ uint32_t length;
+ /**< The message length, in bytes, of the source buffer
+ * on which the cryptographic operation will be
+ * computed. This must be a multiple of the block size
+ */
+ } data; /**< Data offsets and length for AEAD */
+ struct {
+ uint8_t *data;
+ /**< This points to the location where the digest result
+ * should be inserted (in the case of digest generation)
+ * or where the purported digest exists (in the case of
+ * digest verification).
+ *
+ * At session creation time, the client specified the
+ * digest result length with the digest_length member
+ * of the @ref rte_crypto_auth_xform structure. For
+ * physical crypto devices the caller must allocate at
+ * least digest_length of physically contiguous memory
+ * at this location.
+ *
+ * For digest generation, the digest result will
+ * overwrite any data at this location.
+ *
+ * @note
+ * For GCM (@ref RTE_CRYPTO_AEAD_AES_GCM), for
+ * "digest result" read "authentication tag T".
+ */
+ phys_addr_t phys_addr;
+ /**< Physical address of digest */
+ } digest; /**< Digest parameters */
+ struct {
+ uint8_t *data;
+ /**< Pointer to Additional Authenticated Data (AAD)
+ * needed for authenticated cipher mechanisms (CCM and
+ * GCM)
+ *
+ * Specifically for CCM (@ref RTE_CRYPTO_AEAD_AES_CCM),
+ * the caller should setup this field as follows:
+ *
+ * - the nonce should be written starting at an offset
+ * of one byte into the array, leaving room for the
+ * implementation to write in the flags to the first
+ * byte.
+ *
+ * - the additional authentication data itself should
+ * be written starting at an offset of 18 bytes into
+ * the array, leaving room for the length encoding in
+ * the first two bytes of the second block.
+ *
+ * - the array should be big enough to hold the above
+ * fields, plus any padding to round this up to the
+ * nearest multiple of the block size (16 bytes).
+ * Padding will be added by the implementation.
+ *
+ * Finally, for GCM (@ref RTE_CRYPTO_AEAD_AES_GCM), the
+ * caller should setup this field as follows:
+ *
+ * - the AAD is written in starting at byte 0
+ * - the array must be big enough to hold the AAD, plus
+ * any space to round this up to the nearest multiple
+ * of the block size (16 bytes).
+ *
+ */
+ phys_addr_t phys_addr; /**< physical address */
+ } aad;
+ /**< Additional authentication parameters */
+ } aead;
struct {
- uint8_t *data;
- /**< Pointer to Additional Authenticated Data (AAD)
- * needed for authenticated cipher mechanisms (CCM and
- * GCM), and to the IV for SNOW 3G authentication
- * (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2). For other
- * authentication mechanisms this pointer is ignored.
- *
- * The length of the data pointed to by this field is
- * set up for the session in the @ref
- * rte_crypto_auth_xform structure as part of the @ref
- * rte_cryptodev_sym_session_create function call.
- * This length must not exceed 65535 (2^16-1) bytes.
- *
- * Specifically for CCM (@ref RTE_CRYPTO_AUTH_AES_CCM),
- * the caller should setup this field as follows:
- *
- * - the nonce should be written starting at an offset
- * of one byte into the array, leaving room for the
- * implementation to write in the flags to the first
- * byte.
- *
- * - the additional authentication data itself should
- * be written starting at an offset of 18 bytes into
- * the array, leaving room for the length encoding in
- * the first two bytes of the second block.
- *
- * - the array should be big enough to hold the above
- * fields, plus any padding to round this up to the
- * nearest multiple of the block size (16 bytes).
- * Padding will be added by the implementation.
- *
- * Finally, for GCM (@ref RTE_CRYPTO_AUTH_AES_GCM), the
- * caller should setup this field as follows:
- *
- * - the AAD is written in starting at byte 0
- * - the array must be big enough to hold the AAD, plus
- * any space to round this up to the nearest multiple
- * of the block size (16 bytes).
- *
- * @note
- * For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) mode of
- * operation, this field is used to pass plaintext.
- */
- phys_addr_t phys_addr; /**< physical address */
- uint16_t length;
- /**< Length of additional authenticated data (AAD)
- * in bytes
- */
- } aad;
- /**< Additional authentication parameters */
- } auth;
-} __rte_cache_aligned;
+ struct {
+ struct {
+ uint32_t offset;
+ /**< Starting point for cipher processing,
+ * specified as number of bytes from start
+ * of data in the source buffer.
+ * The result of the cipher operation will be
+ * written back into the output buffer
+ * starting at this location.
+ *
+ * @note
+ * For SNOW 3G @ RTE_CRYPTO_CIPHER_SNOW3G_UEA2,
+ * KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8
+ * and ZUC @ RTE_CRYPTO_CIPHER_ZUC_EEA3,
+ * this field should be in bits.
+ */
+ uint32_t length;
+ /**< The message length, in bytes, of the
+ * source buffer on which the cryptographic
+ * operation will be computed.
+ * This must be a multiple of the block size
+ * if a block cipher is being used. This is
+ * also the same as the result length.
+ *
+ * @note
+ * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UEA2,
+ * KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8
+ * and ZUC @ RTE_CRYPTO_CIPHER_ZUC_EEA3,
+ * this field should be in bits.
+ */
+ } data; /**< Data offsets and length for ciphering */
+ } cipher;
+
+ struct {
+ struct {
+ uint32_t offset;
+ /**< Starting point for hash processing,
+ * specified as number of bytes from start of
+ * packet in source buffer.
+ *
+ * @note
+ * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2,
+ * KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9
+ * and ZUC @ RTE_CRYPTO_AUTH_ZUC_EIA3,
+ * this field should be in bits.
+ *
+ * @note
+ * For KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9,
+ * this offset should be such that
+ * data to authenticate starts at COUNT.
+ */
+ uint32_t length;
+ /**< The message length, in bytes, of the source
+ * buffer that the hash will be computed on.
+ *
+ * @note
+ * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2,
+ * KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9
+ * and ZUC @ RTE_CRYPTO_AUTH_ZUC_EIA3,
+ * this field should be in bits.
+ *
+ * @note
+ * For KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9,
+ * the length should include the COUNT,
+ * FRESH, message, direction bit and padding
+ * (to be multiple of 8 bits).
+ */
+ } data;
+ /**< Data offsets and length for authentication */
+
+ struct {
+ uint8_t *data;
+ /**< This points to the location where
+ * the digest result should be inserted
+ * (in the case of digest generation)
+ * or where the purported digest exists
+ * (in the case of digest verification).
+ *
+ * At session creation time, the client
+ * specified the digest result length with
+ * the digest_length member of the
+ * @ref rte_crypto_auth_xform structure.
+ * For physical crypto devices the caller
+ * must allocate at least digest_length of
+ * physically contiguous memory at this
+ * location.
+ *
+ * For digest generation, the digest result
+ * will overwrite any data at this location.
+ *
+ */
+ phys_addr_t phys_addr;
+ /**< Physical address of digest */
+ } digest; /**< Digest parameters */
+ } auth;
+ };
+ };
+};
/**
@@ -665,8 +694,6 @@ static inline void
__rte_crypto_sym_op_reset(struct rte_crypto_sym_op *op)
{
memset(op, 0, sizeof(*op));
-
- op->sess_type = RTE_CRYPTO_SYM_OP_SESSIONLESS;
}
@@ -708,7 +735,6 @@ __rte_crypto_sym_op_attach_sym_session(struct rte_crypto_sym_op *sym_op,
struct rte_cryptodev_sym_session *sess)
{
sym_op->session = sess;
- sym_op->sess_type = RTE_CRYPTO_SYM_OP_WITH_SESSION;
return 0;
}
diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c
index b65cd9ce..327d7e84 100644
--- a/lib/librte_cryptodev/rte_cryptodev.c
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -47,7 +47,6 @@
#include <rte_debug.h>
#include <rte_dev.h>
#include <rte_interrupts.h>
-#include <rte_pci.h>
#include <rte_memory.h>
#include <rte_memcpy.h>
#include <rte_memzone.h>
@@ -70,6 +69,8 @@
#include "rte_cryptodev.h"
#include "rte_cryptodev_pmd.h"
+static uint8_t nb_drivers;
+
struct rte_cryptodev rte_crypto_devices[RTE_CRYPTO_MAX_DEVS];
struct rte_cryptodev *rte_cryptodevs = &rte_crypto_devices[0];
@@ -101,18 +102,6 @@ struct rte_cryptodev_callback {
uint32_t active; /**< Callback is executing */
};
-#define RTE_CRYPTODEV_VDEV_NAME ("name")
-#define RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG ("max_nb_queue_pairs")
-#define RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG ("max_nb_sessions")
-#define RTE_CRYPTODEV_VDEV_SOCKET_ID ("socket_id")
-
-static const char *cryptodev_vdev_valid_params[] = {
- RTE_CRYPTODEV_VDEV_NAME,
- RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG,
- RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG,
- RTE_CRYPTODEV_VDEV_SOCKET_ID
-};
-
/**
* The crypto cipher algorithm strings identifiers.
* It could be used in application command line.
@@ -124,11 +113,9 @@ rte_crypto_cipher_algorithm_strings[] = {
[RTE_CRYPTO_CIPHER_3DES_CTR] = "3des-ctr",
[RTE_CRYPTO_CIPHER_AES_CBC] = "aes-cbc",
- [RTE_CRYPTO_CIPHER_AES_CCM] = "aes-ccm",
[RTE_CRYPTO_CIPHER_AES_CTR] = "aes-ctr",
[RTE_CRYPTO_CIPHER_AES_DOCSISBPI] = "aes-docsisbpi",
[RTE_CRYPTO_CIPHER_AES_ECB] = "aes-ecb",
- [RTE_CRYPTO_CIPHER_AES_GCM] = "aes-gcm",
[RTE_CRYPTO_CIPHER_AES_F8] = "aes-f8",
[RTE_CRYPTO_CIPHER_AES_XTS] = "aes-xts",
@@ -161,9 +148,7 @@ rte_crypto_cipher_operation_strings[] = {
const char *
rte_crypto_auth_algorithm_strings[] = {
[RTE_CRYPTO_AUTH_AES_CBC_MAC] = "aes-cbc-mac",
- [RTE_CRYPTO_AUTH_AES_CCM] = "aes-ccm",
[RTE_CRYPTO_AUTH_AES_CMAC] = "aes-cmac",
- [RTE_CRYPTO_AUTH_AES_GCM] = "aes-gcm",
[RTE_CRYPTO_AUTH_AES_GMAC] = "aes-gmac",
[RTE_CRYPTO_AUTH_AES_XCBC_MAC] = "aes-xcbc-mac",
@@ -189,6 +174,26 @@ rte_crypto_auth_algorithm_strings[] = {
[RTE_CRYPTO_AUTH_ZUC_EIA3] = "zuc-eia3"
};
+/**
+ * The crypto AEAD algorithm strings identifiers.
+ * It could be used in application command line.
+ */
+const char *
+rte_crypto_aead_algorithm_strings[] = {
+ [RTE_CRYPTO_AEAD_AES_CCM] = "aes-ccm",
+ [RTE_CRYPTO_AEAD_AES_GCM] = "aes-gcm",
+};
+
+/**
+ * The crypto AEAD operation strings identifiers.
+ * It could be used in application command line.
+ */
+const char *
+rte_crypto_aead_operation_strings[] = {
+ [RTE_CRYPTO_AEAD_OP_ENCRYPT] = "encrypt",
+ [RTE_CRYPTO_AEAD_OP_DECRYPT] = "decrypt"
+};
+
int
rte_cryptodev_get_cipher_algo_enum(enum rte_crypto_cipher_algorithm *algo_enum,
const char *algo_string)
@@ -223,6 +228,23 @@ rte_cryptodev_get_auth_algo_enum(enum rte_crypto_auth_algorithm *algo_enum,
return -1;
}
+int
+rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum,
+ const char *algo_string)
+{
+ unsigned int i;
+
+ for (i = 1; i < RTE_DIM(rte_crypto_aead_algorithm_strings); i++) {
+ if (strcmp(algo_string, rte_crypto_aead_algorithm_strings[i]) == 0) {
+ *algo_enum = (enum rte_crypto_aead_algorithm) i;
+ return 0;
+ }
+ }
+
+ /* Invalid string */
+ return -1;
+}
+
/**
* The crypto auth operation strings identifiers.
* It could be used in application command line.
@@ -233,111 +255,6 @@ rte_crypto_auth_operation_strings[] = {
[RTE_CRYPTO_AUTH_OP_GENERATE] = "generate"
};
-static uint8_t
-number_of_sockets(void)
-{
- int sockets = 0;
- int i;
- const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-
- for (i = 0; ((i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL)); i++) {
- if (sockets < ms[i].socket_id)
- sockets = ms[i].socket_id;
- }
-
- /* Number of sockets = maximum socket_id + 1 */
- return ++sockets;
-}
-
-/** Parse integer from integer argument */
-static int
-parse_integer_arg(const char *key __rte_unused,
- const char *value, void *extra_args)
-{
- int *i = extra_args;
-
- *i = atoi(value);
- if (*i < 0) {
- CDEV_LOG_ERR("Argument has to be positive.");
- return -1;
- }
-
- return 0;
-}
-
-/** Parse name */
-static int
-parse_name_arg(const char *key __rte_unused,
- const char *value, void *extra_args)
-{
- struct rte_crypto_vdev_init_params *params = extra_args;
-
- if (strlen(value) >= RTE_CRYPTODEV_NAME_MAX_LEN - 1) {
- CDEV_LOG_ERR("Invalid name %s, should be less than "
- "%u bytes", value,
- RTE_CRYPTODEV_NAME_MAX_LEN - 1);
- return -1;
- }
-
- strncpy(params->name, value, RTE_CRYPTODEV_NAME_MAX_LEN);
-
- return 0;
-}
-
-int
-rte_cryptodev_parse_vdev_init_params(struct rte_crypto_vdev_init_params *params,
- const char *input_args)
-{
- struct rte_kvargs *kvlist = NULL;
- int ret = 0;
-
- if (params == NULL)
- return -EINVAL;
-
- if (input_args) {
- kvlist = rte_kvargs_parse(input_args,
- cryptodev_vdev_valid_params);
- if (kvlist == NULL)
- return -1;
-
- ret = rte_kvargs_process(kvlist,
- RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG,
- &parse_integer_arg,
- &params->max_nb_queue_pairs);
- if (ret < 0)
- goto free_kvlist;
-
- ret = rte_kvargs_process(kvlist,
- RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG,
- &parse_integer_arg,
- &params->max_nb_sessions);
- if (ret < 0)
- goto free_kvlist;
-
- ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_SOCKET_ID,
- &parse_integer_arg,
- &params->socket_id);
- if (ret < 0)
- goto free_kvlist;
-
- ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_NAME,
- &parse_name_arg,
- params);
- if (ret < 0)
- goto free_kvlist;
-
- if (params->socket_id >= number_of_sockets()) {
- CDEV_LOG_ERR("Invalid socket id specified to create "
- "the virtual crypto device on");
- goto free_kvlist;
- }
- }
-
-free_kvlist:
- rte_kvargs_free(kvlist);
- return ret;
-}
-
const struct rte_cryptodev_symmetric_capability *
rte_cryptodev_sym_capability_get(uint8_t dev_id,
const struct rte_cryptodev_sym_capability_idx *idx)
@@ -363,6 +280,10 @@ rte_cryptodev_sym_capability_get(uint8_t dev_id,
if (idx->type == RTE_CRYPTO_SYM_XFORM_CIPHER &&
capability->sym.cipher.algo == idx->algo.cipher)
return &capability->sym;
+
+ if (idx->type == RTE_CRYPTO_SYM_XFORM_AEAD &&
+ capability->sym.aead.algo == idx->algo.aead)
+ return &capability->sym;
}
return NULL;
@@ -390,7 +311,7 @@ rte_cryptodev_sym_capability_check_cipher(
int
rte_cryptodev_sym_capability_check_auth(
const struct rte_cryptodev_symmetric_capability *capability,
- uint16_t key_size, uint16_t digest_size, uint16_t aad_size)
+ uint16_t key_size, uint16_t digest_size, uint16_t iv_size)
{
if (param_range_check(key_size, capability->auth.key_size))
return -1;
@@ -398,12 +319,32 @@ rte_cryptodev_sym_capability_check_auth(
if (param_range_check(digest_size, capability->auth.digest_size))
return -1;
- if (param_range_check(aad_size, capability->auth.aad_size))
+ if (param_range_check(iv_size, capability->auth.iv_size))
return -1;
return 0;
}
+int
+rte_cryptodev_sym_capability_check_aead(
+ const struct rte_cryptodev_symmetric_capability *capability,
+ uint16_t key_size, uint16_t digest_size, uint16_t aad_size,
+ uint16_t iv_size)
+{
+ if (param_range_check(key_size, capability->aead.key_size))
+ return -1;
+
+ if (param_range_check(digest_size, capability->aead.digest_size))
+ return -1;
+
+ if (param_range_check(aad_size, capability->aead.aad_size))
+ return -1;
+
+ if (param_range_check(iv_size, capability->aead.iv_size))
+ return -1;
+
+ return 0;
+}
const char *
rte_cryptodev_get_feature_name(uint64_t flag)
@@ -509,12 +450,12 @@ rte_cryptodev_count(void)
}
uint8_t
-rte_cryptodev_count_devtype(enum rte_cryptodev_type type)
+rte_cryptodev_device_count_by_driver(uint8_t driver_id)
{
uint8_t i, dev_count = 0;
for (i = 0; i < rte_cryptodev_globals->max_devs; i++)
- if (rte_cryptodev_globals->devs[i].dev_type == type &&
+ if (rte_cryptodev_globals->devs[i].driver_id == driver_id &&
rte_cryptodev_globals->devs[i].attached ==
RTE_CRYPTODEV_ATTACHED)
dev_count++;
@@ -523,7 +464,7 @@ rte_cryptodev_count_devtype(enum rte_cryptodev_type type)
}
uint8_t
-rte_cryptodev_devices_get(const char *dev_name, uint8_t *devices,
+rte_cryptodev_devices_get(const char *driver_name, uint8_t *devices,
uint8_t nb_devices)
{
uint8_t i, count = 0;
@@ -533,15 +474,11 @@ rte_cryptodev_devices_get(const char *dev_name, uint8_t *devices,
for (i = 0; i < max_devs && count < nb_devices; i++) {
if (devs[i].attached == RTE_CRYPTODEV_ATTACHED) {
- const struct rte_cryptodev_driver *drv = devs[i].driver;
int cmp;
- if (drv)
- cmp = strncmp(drv->pci_drv.driver.name,
- dev_name, strlen(dev_name));
- else
- cmp = strncmp(devs[i].data->name,
- dev_name, strlen(dev_name));
+ cmp = strncmp(devs[i].device->driver->name,
+ driver_name,
+ strlen(driver_name));
if (cmp == 0)
devices[count++] = devs[i].data->dev_id;
@@ -662,144 +599,15 @@ rte_cryptodev_pmd_release_device(struct rte_cryptodev *cryptodev)
if (cryptodev == NULL)
return -EINVAL;
- ret = rte_cryptodev_close(cryptodev->data->dev_id);
- if (ret < 0)
- return ret;
-
- cryptodev->attached = RTE_CRYPTODEV_DETACHED;
- cryptodev_globals.nb_devs--;
- return 0;
-}
-
-struct rte_cryptodev *
-rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size,
- int socket_id)
-{
- struct rte_cryptodev *cryptodev;
-
- /* allocate device structure */
- cryptodev = rte_cryptodev_pmd_allocate(name, socket_id);
- if (cryptodev == NULL)
- return NULL;
-
- /* allocate private device structure */
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- cryptodev->data->dev_private =
- rte_zmalloc_socket("cryptodev device private",
- dev_private_size,
- RTE_CACHE_LINE_SIZE,
- socket_id);
-
- if (cryptodev->data->dev_private == NULL)
- rte_panic("Cannot allocate memzone for private device"
- " data");
- }
-
- /* initialise user call-back tail queue */
- TAILQ_INIT(&(cryptodev->link_intr_cbs));
-
- return cryptodev;
-}
-
-int
-rte_cryptodev_pci_probe(struct rte_pci_driver *pci_drv,
- struct rte_pci_device *pci_dev)
-{
- struct rte_cryptodev_driver *cryptodrv;
- struct rte_cryptodev *cryptodev;
-
- char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN];
-
- int retval;
-
- cryptodrv = (struct rte_cryptodev_driver *)pci_drv;
- if (cryptodrv == NULL)
- return -ENODEV;
-
- rte_pci_device_name(&pci_dev->addr, cryptodev_name,
- sizeof(cryptodev_name));
-
- cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, rte_socket_id());
- if (cryptodev == NULL)
- return -ENOMEM;
-
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- cryptodev->data->dev_private =
- rte_zmalloc_socket(
- "cryptodev private structure",
- cryptodrv->dev_private_size,
- RTE_CACHE_LINE_SIZE,
- rte_socket_id());
-
- if (cryptodev->data->dev_private == NULL)
- rte_panic("Cannot allocate memzone for private "
- "device data");
+ /* Close device only if device operations have been set */
+ if (cryptodev->dev_ops) {
+ ret = rte_cryptodev_close(cryptodev->data->dev_id);
+ if (ret < 0)
+ return ret;
}
- cryptodev->device = &pci_dev->device;
- cryptodev->driver = cryptodrv;
-
- /* init user callbacks */
- TAILQ_INIT(&(cryptodev->link_intr_cbs));
-
- /* Invoke PMD device initialization function */
- retval = (*cryptodrv->cryptodev_init)(cryptodrv, cryptodev);
- if (retval == 0)
- return 0;
-
- CDEV_LOG_ERR("driver %s: crypto_dev_init(vendor_id=0x%x device_id=0x%x)"
- " failed", pci_drv->driver.name,
- (unsigned) pci_dev->id.vendor_id,
- (unsigned) pci_dev->id.device_id);
-
- if (rte_eal_process_type() == RTE_PROC_PRIMARY)
- rte_free(cryptodev->data->dev_private);
-
cryptodev->attached = RTE_CRYPTODEV_DETACHED;
cryptodev_globals.nb_devs--;
-
- return -ENXIO;
-}
-
-int
-rte_cryptodev_pci_remove(struct rte_pci_device *pci_dev)
-{
- const struct rte_cryptodev_driver *cryptodrv;
- struct rte_cryptodev *cryptodev;
- char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN];
- int ret;
-
- if (pci_dev == NULL)
- return -EINVAL;
-
- rte_pci_device_name(&pci_dev->addr, cryptodev_name,
- sizeof(cryptodev_name));
-
- cryptodev = rte_cryptodev_pmd_get_named_dev(cryptodev_name);
- if (cryptodev == NULL)
- return -ENODEV;
-
- cryptodrv = (const struct rte_cryptodev_driver *)pci_dev->driver;
- if (cryptodrv == NULL)
- return -ENODEV;
-
- /* Invoke PMD device uninit function */
- if (*cryptodrv->cryptodev_uninit) {
- ret = (*cryptodrv->cryptodev_uninit)(cryptodrv, cryptodev);
- if (ret)
- return ret;
- }
-
- /* free crypto device */
- rte_cryptodev_pmd_release_device(cryptodev);
-
- if (rte_eal_process_type() == RTE_PROC_PRIMARY)
- rte_free(cryptodev->data->dev_private);
-
- cryptodev->device = NULL;
- cryptodev->driver = NULL;
- cryptodev->data = NULL;
-
return 0;
}
@@ -934,10 +742,6 @@ rte_cryptodev_queue_pair_stop(uint8_t dev_id, uint16_t queue_pair_id)
}
-static int
-rte_cryptodev_sym_session_pool_create(struct rte_cryptodev *dev,
- unsigned nb_objs, unsigned obj_cache_size, int socket_id);
-
int
rte_cryptodev_configure(uint8_t dev_id, struct rte_cryptodev_config *config)
{
@@ -968,14 +772,6 @@ rte_cryptodev_configure(uint8_t dev_id, struct rte_cryptodev_config *config)
return diag;
}
- /* Setup Session mempool for device */
- diag = rte_cryptodev_sym_session_pool_create(dev,
- config->session_mp.nb_objs,
- config->session_mp.cache_size,
- config->socket_id);
- if (diag != 0)
- return diag;
-
return (*dev->dev_ops->dev_configure)(dev, config);
}
@@ -1032,8 +828,8 @@ rte_cryptodev_stop(uint8_t dev_id)
return;
}
- dev->data->dev_started = 0;
(*dev->dev_ops->dev_stop)(dev);
+ dev->data->dev_started = 0;
}
int
@@ -1078,7 +874,9 @@ rte_cryptodev_close(uint8_t dev_id)
int
rte_cryptodev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id,
- const struct rte_cryptodev_qp_conf *qp_conf, int socket_id)
+ const struct rte_cryptodev_qp_conf *qp_conf, int socket_id,
+ struct rte_mempool *session_pool)
+
{
struct rte_cryptodev *dev;
@@ -1102,7 +900,7 @@ rte_cryptodev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id,
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_setup, -ENOTSUP);
return (*dev->dev_ops->queue_pair_setup)(dev, queue_pair_id, qp_conf,
- socket_id);
+ socket_id, session_pool);
}
@@ -1163,9 +961,7 @@ rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info)
RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get);
(*dev->dev_ops->dev_infos_get)(dev, dev_info);
- dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device);
- if (dev->driver)
- dev_info->driver_name = dev->driver->pci_drv.driver.name;
+ dev_info->driver_name = dev->device->driver->name;
}
@@ -1281,142 +1077,74 @@ rte_cryptodev_pmd_callback_process(struct rte_cryptodev *dev,
}
-static void
-rte_cryptodev_sym_session_init(struct rte_mempool *mp,
- void *opaque_arg,
- void *_sess,
- __rte_unused unsigned i)
-{
- struct rte_cryptodev_sym_session *sess = _sess;
- struct rte_cryptodev *dev = opaque_arg;
-
- memset(sess, 0, mp->elt_size);
-
- sess->dev_id = dev->data->dev_id;
- sess->dev_type = dev->dev_type;
- sess->mp = mp;
-
- if (dev->dev_ops->session_initialize)
- (*dev->dev_ops->session_initialize)(mp, sess);
-}
-
-static int
-rte_cryptodev_sym_session_pool_create(struct rte_cryptodev *dev,
- unsigned nb_objs, unsigned obj_cache_size, int socket_id)
+int
+rte_cryptodev_sym_session_init(uint8_t dev_id,
+ struct rte_cryptodev_sym_session *sess,
+ struct rte_crypto_sym_xform *xforms,
+ struct rte_mempool *mp)
{
- char mp_name[RTE_CRYPTODEV_NAME_MAX_LEN];
- unsigned priv_sess_size;
+ struct rte_cryptodev *dev;
+ uint8_t index;
+ int ret;
- unsigned n = snprintf(mp_name, sizeof(mp_name), "cdev_%d_sess_mp",
- dev->data->dev_id);
- if (n > sizeof(mp_name)) {
- CDEV_LOG_ERR("Unable to create unique name for session mempool");
- return -ENOMEM;
- }
+ dev = rte_cryptodev_pmd_get_dev(dev_id);
- RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->session_get_size, -ENOTSUP);
- priv_sess_size = (*dev->dev_ops->session_get_size)(dev);
- if (priv_sess_size == 0) {
- CDEV_LOG_ERR("%s returned and invalid private session size ",
- dev->data->name);
- return -ENOMEM;
- }
+ if (sess == NULL || xforms == NULL || dev == NULL)
+ return -EINVAL;
- unsigned elt_size = sizeof(struct rte_cryptodev_sym_session) +
- priv_sess_size;
+ index = dev->driver_id;
- dev->data->session_pool = rte_mempool_lookup(mp_name);
- if (dev->data->session_pool != NULL) {
- if ((dev->data->session_pool->elt_size != elt_size) ||
- (dev->data->session_pool->cache_size <
- obj_cache_size) ||
- (dev->data->session_pool->size < nb_objs)) {
-
- CDEV_LOG_ERR("%s mempool already exists with different"
- " initialization parameters", mp_name);
- dev->data->session_pool = NULL;
- return -ENOMEM;
- }
- } else {
- dev->data->session_pool = rte_mempool_create(
- mp_name, /* mempool name */
- nb_objs, /* number of elements*/
- elt_size, /* element size*/
- obj_cache_size, /* Cache size*/
- 0, /* private data size */
- NULL, /* obj initialization constructor */
- NULL, /* obj initialization constructor arg */
- rte_cryptodev_sym_session_init,
- /**< obj constructor*/
- dev, /* obj constructor arg */
- socket_id, /* socket id */
- 0); /* flags */
-
- if (dev->data->session_pool == NULL) {
- CDEV_LOG_ERR("%s mempool allocation failed", mp_name);
- return -ENOMEM;
+ if (sess->sess_private_data[index] == NULL) {
+ ret = dev->dev_ops->session_configure(dev, xforms, sess, mp);
+ if (ret < 0) {
+ CDEV_LOG_ERR(
+ "dev_id %d failed to configure session details",
+ dev_id);
+ return ret;
}
}
- CDEV_LOG_DEBUG("%s mempool created!", mp_name);
return 0;
}
struct rte_cryptodev_sym_session *
-rte_cryptodev_sym_session_create(uint8_t dev_id,
- struct rte_crypto_sym_xform *xform)
+rte_cryptodev_sym_session_create(struct rte_mempool *mp)
{
- struct rte_cryptodev *dev;
struct rte_cryptodev_sym_session *sess;
- void *_sess;
-
- if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
- CDEV_LOG_ERR("Invalid dev_id=%d", dev_id);
- return NULL;
- }
-
- dev = &rte_crypto_devices[dev_id];
/* Allocate a session structure from the session pool */
- if (rte_mempool_get(dev->data->session_pool, &_sess)) {
- CDEV_LOG_ERR("Couldn't get object from session mempool");
+ if (rte_mempool_get(mp, (void *)&sess)) {
+ CDEV_LOG_ERR("couldn't get object from session mempool");
return NULL;
}
- sess = _sess;
-
- RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->session_configure, NULL);
- if (dev->dev_ops->session_configure(dev, xform, sess->_private) ==
- NULL) {
- CDEV_LOG_ERR("dev_id %d failed to configure session details",
- dev_id);
-
- /* Return session to mempool */
- rte_mempool_put(sess->mp, _sess);
- return NULL;
- }
+ /* Clear device session pointer */
+ memset(sess, 0, (sizeof(void *) * nb_drivers));
return sess;
}
int
-rte_cryptodev_queue_pair_attach_sym_session(uint16_t qp_id,
+rte_cryptodev_queue_pair_attach_sym_session(uint8_t dev_id, uint16_t qp_id,
struct rte_cryptodev_sym_session *sess)
{
struct rte_cryptodev *dev;
- if (!rte_cryptodev_pmd_is_valid_dev(sess->dev_id)) {
- CDEV_LOG_ERR("Invalid dev_id=%d", sess->dev_id);
+ if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
+ CDEV_LOG_ERR("Invalid dev_id=%d", dev_id);
return -EINVAL;
}
- dev = &rte_crypto_devices[sess->dev_id];
+ dev = &rte_crypto_devices[dev_id];
/* The API is optional, not returning error if driver do not suuport */
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->qp_attach_session, 0);
- if (dev->dev_ops->qp_attach_session(dev, qp_id, sess->_private)) {
+
+ void *sess_priv = get_session_private_data(sess, dev->driver_id);
+
+ if (dev->dev_ops->qp_attach_session(dev, qp_id, sess_priv)) {
CDEV_LOG_ERR("dev_id %d failed to attach qp: %d with session",
- sess->dev_id, qp_id);
+ dev_id, qp_id);
return -EPERM;
}
@@ -1424,53 +1152,109 @@ rte_cryptodev_queue_pair_attach_sym_session(uint16_t qp_id,
}
int
-rte_cryptodev_queue_pair_detach_sym_session(uint16_t qp_id,
+rte_cryptodev_queue_pair_detach_sym_session(uint8_t dev_id, uint16_t qp_id,
struct rte_cryptodev_sym_session *sess)
{
struct rte_cryptodev *dev;
- if (!rte_cryptodev_pmd_is_valid_dev(sess->dev_id)) {
- CDEV_LOG_ERR("Invalid dev_id=%d", sess->dev_id);
+ if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
+ CDEV_LOG_ERR("Invalid dev_id=%d", dev_id);
return -EINVAL;
}
- dev = &rte_crypto_devices[sess->dev_id];
+ dev = &rte_crypto_devices[dev_id];
/* The API is optional, not returning error if driver do not suuport */
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->qp_detach_session, 0);
- if (dev->dev_ops->qp_detach_session(dev, qp_id, sess->_private)) {
+
+ void *sess_priv = get_session_private_data(sess, dev->driver_id);
+
+ if (dev->dev_ops->qp_detach_session(dev, qp_id, sess_priv)) {
CDEV_LOG_ERR("dev_id %d failed to detach qp: %d from session",
- sess->dev_id, qp_id);
+ dev_id, qp_id);
return -EPERM;
}
return 0;
}
-struct rte_cryptodev_sym_session *
-rte_cryptodev_sym_session_free(uint8_t dev_id,
+
+int
+rte_cryptodev_sym_session_clear(uint8_t dev_id,
struct rte_cryptodev_sym_session *sess)
{
struct rte_cryptodev *dev;
- if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
- CDEV_LOG_ERR("Invalid dev_id=%d", dev_id);
- return sess;
- }
+ dev = rte_cryptodev_pmd_get_dev(dev_id);
- dev = &rte_crypto_devices[dev_id];
+ if (dev == NULL || sess == NULL)
+ return -EINVAL;
- /* Check the session belongs to this device type */
- if (sess->dev_type != dev->dev_type)
- return sess;
+ dev->dev_ops->session_clear(dev, sess);
- /* Let device implementation clear session material */
- RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->session_clear, sess);
- dev->dev_ops->session_clear(dev, (void *)sess->_private);
+ return 0;
+}
+
+int
+rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess)
+{
+ uint8_t i;
+ void *sess_priv;
+ struct rte_mempool *sess_mp;
+
+ if (sess == NULL)
+ return -EINVAL;
+
+ /* Check that all device private data has been freed */
+ for (i = 0; i < nb_drivers; i++) {
+ sess_priv = get_session_private_data(sess, i);
+ if (sess_priv != NULL)
+ return -EBUSY;
+ }
/* Return session to mempool */
- rte_mempool_put(sess->mp, (void *)sess);
+ sess_mp = rte_mempool_from_obj(sess);
+ rte_mempool_put(sess_mp, sess);
+
+ return 0;
+}
+
+unsigned int
+rte_cryptodev_get_header_session_size(void)
+{
+ /*
+ * Header contains pointers to the private data
+ * of all registered drivers
+ */
+ return (sizeof(void *) * nb_drivers);
+}
+
+unsigned int
+rte_cryptodev_get_private_session_size(uint8_t dev_id)
+{
+ struct rte_cryptodev *dev;
+ unsigned int header_size = sizeof(void *) * nb_drivers;
+ unsigned int priv_sess_size;
+
+ if (!rte_cryptodev_pmd_is_valid_dev(dev_id))
+ return 0;
+
+ dev = rte_cryptodev_pmd_get_dev(dev_id);
+
+ if (*dev->dev_ops->session_get_size == NULL)
+ return 0;
+
+ priv_sess_size = (*dev->dev_ops->session_get_size)(dev);
+
+ /*
+ * If size is less than session header size,
+ * return the latter, as this guarantees that
+ * sessionless operations will work
+ */
+ if (priv_sess_size < header_size)
+ return header_size;
+
+ return priv_sess_size;
- return NULL;
}
/** Initialise rte_crypto_op mempool element */
@@ -1572,3 +1356,58 @@ rte_cryptodev_pmd_create_dev_name(char *name, const char *dev_name_prefix)
return -1;
}
+
+TAILQ_HEAD(cryptodev_driver_list, cryptodev_driver);
+
+static struct cryptodev_driver_list cryptodev_driver_list =
+ TAILQ_HEAD_INITIALIZER(cryptodev_driver_list);
+
+struct cryptodev_driver {
+ TAILQ_ENTRY(cryptodev_driver) next; /**< Next in list. */
+ const struct rte_driver *driver;
+ uint8_t id;
+};
+
+int
+rte_cryptodev_driver_id_get(const char *name)
+{
+ struct cryptodev_driver *driver;
+ const char *driver_name;
+
+ if (name == NULL) {
+ RTE_LOG(DEBUG, CRYPTODEV, "name pointer NULL");
+ return -1;
+ }
+
+ TAILQ_FOREACH(driver, &cryptodev_driver_list, next) {
+ driver_name = driver->driver->name;
+ if (strncmp(driver_name, name, strlen(driver_name)) == 0)
+ return driver->id;
+ }
+ return -1;
+}
+
+const char *
+rte_cryptodev_driver_name_get(uint8_t driver_id)
+{
+ struct cryptodev_driver *driver;
+
+ TAILQ_FOREACH(driver, &cryptodev_driver_list, next)
+ if (driver->id == driver_id)
+ return driver->driver->name;
+ return NULL;
+}
+
+uint8_t
+rte_cryptodev_allocate_driver(const struct rte_driver *drv)
+{
+ struct cryptodev_driver *driver;
+
+ driver = malloc(sizeof(*driver));
+ driver->driver = drv;
+ driver->id = nb_drivers;
+
+ TAILQ_INSERT_TAIL(&cryptodev_driver_list, driver, next);
+
+ return nb_drivers++;
+}
diff --git a/lib/librte_cryptodev/rte_cryptodev.h b/lib/librte_cryptodev/rte_cryptodev.h
index 88aeb873..7ec9c4bc 100644
--- a/lib/librte_cryptodev/rte_cryptodev.h
+++ b/lib/librte_cryptodev/rte_cryptodev.h
@@ -49,44 +49,7 @@ extern "C" {
#include "rte_crypto.h"
#include "rte_dev.h"
#include <rte_common.h>
-
-#define CRYPTODEV_NAME_NULL_PMD crypto_null
-/**< Null crypto PMD device name */
-#define CRYPTODEV_NAME_AESNI_MB_PMD crypto_aesni_mb
-/**< AES-NI Multi buffer PMD device name */
-#define CRYPTODEV_NAME_AESNI_GCM_PMD crypto_aesni_gcm
-/**< AES-NI GCM PMD device name */
-#define CRYPTODEV_NAME_OPENSSL_PMD crypto_openssl
-/**< Open SSL Crypto PMD device name */
-#define CRYPTODEV_NAME_QAT_SYM_PMD crypto_qat
-/**< Intel QAT Symmetric Crypto PMD device name */
-#define CRYPTODEV_NAME_SNOW3G_PMD crypto_snow3g
-/**< SNOW 3G PMD device name */
-#define CRYPTODEV_NAME_KASUMI_PMD crypto_kasumi
-/**< KASUMI PMD device name */
-#define CRYPTODEV_NAME_ZUC_PMD crypto_zuc
-/**< KASUMI PMD device name */
-#define CRYPTODEV_NAME_ARMV8_PMD crypto_armv8
-/**< ARMv8 Crypto PMD device name */
-#define CRYPTODEV_NAME_SCHEDULER_PMD crypto_scheduler
-/**< Scheduler Crypto PMD device name */
-#define CRYPTODEV_NAME_DPAA2_SEC_PMD cryptodev_dpaa2_sec_pmd
-/**< NXP DPAA2 - SEC PMD device name */
-
-/** Crypto device type */
-enum rte_cryptodev_type {
- RTE_CRYPTODEV_NULL_PMD = 1, /**< Null crypto PMD */
- RTE_CRYPTODEV_AESNI_GCM_PMD, /**< AES-NI GCM PMD */
- RTE_CRYPTODEV_AESNI_MB_PMD, /**< AES-NI multi buffer PMD */
- RTE_CRYPTODEV_QAT_SYM_PMD, /**< QAT PMD Symmetric Crypto */
- RTE_CRYPTODEV_SNOW3G_PMD, /**< SNOW 3G PMD */
- RTE_CRYPTODEV_KASUMI_PMD, /**< KASUMI PMD */
- RTE_CRYPTODEV_ZUC_PMD, /**< ZUC PMD */
- RTE_CRYPTODEV_OPENSSL_PMD, /**< OpenSSL PMD */
- RTE_CRYPTODEV_ARMV8_PMD, /**< ARMv8 crypto PMD */
- RTE_CRYPTODEV_SCHEDULER_PMD, /**< Crypto Scheduler PMD */
- RTE_CRYPTODEV_DPAA2_SEC_PMD, /**< NXP DPAA2 - SEC PMD */
-};
+#include <rte_vdev.h>
extern const char **rte_cyptodev_names;
@@ -118,6 +81,38 @@ extern const char **rte_cyptodev_names;
#define CDEV_PMD_TRACE(...) (void)0
#endif
+
+
+/**
+ * A macro that points to an offset from the start
+ * of the crypto operation structure (rte_crypto_op)
+ *
+ * The returned pointer is cast to type t.
+ *
+ * @param c
+ * The crypto operation.
+ * @param o
+ * The offset from the start of the crypto operation.
+ * @param t
+ * The type to cast the result into.
+ */
+#define rte_crypto_op_ctod_offset(c, t, o) \
+ ((t)((char *)(c) + (o)))
+
+/**
+ * A macro that returns the physical address that points
+ * to an offset from the start of the crypto operation
+ * (rte_crypto_op)
+ *
+ * @param c
+ * The crypto operation.
+ * @param o
+ * The offset from the start of the crypto operation
+ * to calculate address from.
+ */
+#define rte_crypto_op_ctophys_offset(c, o) \
+ (phys_addr_t)((c)->phys_addr + (o))
+
/**
* Crypto parameters range description
*/
@@ -137,7 +132,7 @@ struct rte_crypto_param_range {
*/
struct rte_cryptodev_symmetric_capability {
enum rte_crypto_sym_xform_type xform_type;
- /**< Transform type : Authentication / Cipher */
+ /**< Transform type : Authentication / Cipher / AEAD */
RTE_STD_C11
union {
struct {
@@ -151,6 +146,8 @@ struct rte_cryptodev_symmetric_capability {
/**< digest size range */
struct rte_crypto_param_range aad_size;
/**< Additional authentication data size range */
+ struct rte_crypto_param_range iv_size;
+ /**< Initialisation vector data size range */
} auth;
/**< Symmetric Authentication transform capabilities */
struct {
@@ -164,6 +161,20 @@ struct rte_cryptodev_symmetric_capability {
/**< Initialisation vector data size range */
} cipher;
/**< Symmetric Cipher transform capabilities */
+ struct {
+ enum rte_crypto_aead_algorithm algo;
+ /**< AEAD algorithm */
+ uint16_t block_size;
+ /**< algorithm block size */
+ struct rte_crypto_param_range key_size;
+ /**< AEAD key size range */
+ struct rte_crypto_param_range digest_size;
+ /**< digest size range */
+ struct rte_crypto_param_range aad_size;
+ /**< Additional authentication data size range */
+ struct rte_crypto_param_range iv_size;
+ /**< Initialisation vector data size range */
+ } aead;
};
};
@@ -185,6 +196,7 @@ struct rte_cryptodev_sym_capability_idx {
union {
enum rte_crypto_cipher_algorithm cipher;
enum rte_crypto_auth_algorithm auth;
+ enum rte_crypto_aead_algorithm aead;
} algo;
};
@@ -226,7 +238,7 @@ rte_cryptodev_sym_capability_check_cipher(
* @param capability Description of the symmetric crypto capability.
* @param key_size Auth key size.
* @param digest_size Auth digest size.
- * @param aad_size Auth aad size.
+ * @param iv_size Auth initial vector size.
*
* @return
* - Return 0 if the parameters are in range of the capability.
@@ -235,7 +247,27 @@ rte_cryptodev_sym_capability_check_cipher(
int
rte_cryptodev_sym_capability_check_auth(
const struct rte_cryptodev_symmetric_capability *capability,
- uint16_t key_size, uint16_t digest_size, uint16_t aad_size);
+ uint16_t key_size, uint16_t digest_size, uint16_t iv_size);
+
+/**
+ * Check if key, digest, AAD and initial vector sizes are supported
+ * in crypto AEAD capability
+ *
+ * @param capability Description of the symmetric crypto capability.
+ * @param key_size AEAD key size.
+ * @param digest_size AEAD digest size.
+ * @param aad_size AEAD AAD size.
+ * @param iv_size AEAD IV size.
+ *
+ * @return
+ * - Return 0 if the parameters are in range of the capability.
+ * - Return -1 if the parameters are out of range of the capability.
+ */
+int
+rte_cryptodev_sym_capability_check_aead(
+ const struct rte_cryptodev_symmetric_capability *capability,
+ uint16_t key_size, uint16_t digest_size, uint16_t aad_size,
+ uint16_t iv_size);
/**
* Provide the cipher algorithm enum, given an algorithm string
@@ -267,6 +299,21 @@ int
rte_cryptodev_get_auth_algo_enum(enum rte_crypto_auth_algorithm *algo_enum,
const char *algo_string);
+/**
+ * Provide the AEAD algorithm enum, given an algorithm string
+ *
+ * @param algo_enum A pointer to the AEAD algorithm
+ * enum to be filled
+ * @param algo_string AEAD algorithm string
+ *
+ * @return
+ * - Return -1 if string is not valid
+ * - Return 0 is the string is valid
+ */
+int
+rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum,
+ const char *algo_string);
+
/** Macro used at end of crypto PMD list */
#define RTE_CRYPTODEV_END_OF_CAPABILITIES_LIST() \
{ RTE_CRYPTO_OP_TYPE_UNDEFINED }
@@ -321,7 +368,7 @@ rte_cryptodev_get_feature_name(uint64_t flag);
/** Crypto device information */
struct rte_cryptodev_info {
const char *driver_name; /**< Driver name. */
- enum rte_cryptodev_type dev_type; /**< Device type */
+ uint8_t driver_id; /**< Driver identifier */
struct rte_pci_device *pci_dev; /**< PCI information. */
uint64_t feature_flags; /**< Feature flags */
@@ -385,37 +432,10 @@ struct rte_cryptodev_stats {
#define RTE_CRYPTODEV_NAME_MAX_LEN (64)
/**< Max length of name of crypto PMD */
-#define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_QUEUE_PAIRS 8
-#define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_SESSIONS 2048
-
-/**
- * @internal
- * Initialisation parameters for virtual crypto devices
- */
-struct rte_crypto_vdev_init_params {
- unsigned max_nb_queue_pairs;
- unsigned max_nb_sessions;
- uint8_t socket_id;
- char name[RTE_CRYPTODEV_NAME_MAX_LEN];
-};
/**
- * Parse virtual device initialisation parameters input arguments
- * @internal
- *
- * @params params Initialisation parameters with defaults set.
- * @params input_args Command line arguments
+ * @deprecated
*
- * @return
- * 0 on successful parse
- * <0 on failure to parse
- */
-int
-rte_cryptodev_parse_vdev_init_params(
- struct rte_crypto_vdev_init_params *params,
- const char *input_args);
-
-/**
* Create a virtual crypto device
*
* @param name Cryptodev PMD name of device to be created.
@@ -426,6 +446,7 @@ rte_cryptodev_parse_vdev_init_params(
* which will be between 0 and rte_cryptodev_count().
* - In the case of a failure, returns -1.
*/
+__rte_deprecated
extern int
rte_cryptodev_create_vdev(const char *name, const char *args);
@@ -454,18 +475,19 @@ rte_cryptodev_count(void);
/**
* Get number of crypto device defined type.
*
- * @param type type of device.
+ * @param driver_id driver identifier.
*
* @return
* Returns number of crypto device.
*/
extern uint8_t
-rte_cryptodev_count_devtype(enum rte_cryptodev_type type);
+rte_cryptodev_device_count_by_driver(uint8_t driver_id);
/**
- * Get number and identifiers of attached crypto device.
+ * Get number and identifiers of attached crypto devices that
+ * use the same crypto driver.
*
- * @param dev_name device name.
+ * @param driver_name driver name.
* @param devices output devices identifiers.
* @param nb_devices maximal number of devices.
*
@@ -473,7 +495,7 @@ rte_cryptodev_count_devtype(enum rte_cryptodev_type type);
* Returns number of attached crypto device.
*/
uint8_t
-rte_cryptodev_devices_get(const char *dev_name, uint8_t *devices,
+rte_cryptodev_devices_get(const char *driver_name, uint8_t *devices,
uint8_t nb_devices);
/*
* Return the NUMA socket to which a device is connected
@@ -493,11 +515,6 @@ struct rte_cryptodev_config {
int socket_id; /**< Socket to allocate resources on */
uint16_t nb_queue_pairs;
/**< Number of queue pairs to configure on device */
-
- struct {
- uint32_t nb_objs; /**< Number of objects in mempool */
- uint32_t cache_size; /**< l-core object cache size */
- } session_mp; /**< Session mempool configuration */
};
/**
@@ -574,6 +591,8 @@ rte_cryptodev_close(uint8_t dev_id);
* *SOCKET_ID_ANY* if there is no NUMA constraint
* for the DMA memory allocated for the receive
* queue pair.
+ * @param session_pool Pointer to device session mempool, used
+ * for session-less operations.
*
* @return
* - 0: Success, queue pair correctly set up.
@@ -581,7 +600,8 @@ rte_cryptodev_close(uint8_t dev_id);
*/
extern int
rte_cryptodev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id,
- const struct rte_cryptodev_qp_conf *qp_conf, int socket_id);
+ const struct rte_cryptodev_qp_conf *qp_conf, int socket_id,
+ struct rte_mempool *session_pool);
/**
* Start a specified queue pair of a device. It is used
@@ -721,8 +741,6 @@ struct rte_cryptodev {
enqueue_pkt_burst_t enqueue_burst;
/**< Pointer to PMD transmit function. */
- const struct rte_cryptodev_driver *driver;
- /**< Driver for this device */
struct rte_cryptodev_data *data;
/**< Pointer to device data */
struct rte_cryptodev_ops *dev_ops;
@@ -732,8 +750,8 @@ struct rte_cryptodev {
struct rte_device *device;
/**< Backing device */
- enum rte_cryptodev_type dev_type;
- /**< Crypto device type */
+ uint8_t driver_id;
+ /**< Crypto driver identifier*/
struct rte_cryptodev_cb_list link_intr_cbs;
/**< User application callback for interrupts if present */
@@ -866,66 +884,100 @@ rte_cryptodev_enqueue_burst(uint8_t dev_id, uint16_t qp_id,
/** Cryptodev symmetric crypto session */
struct rte_cryptodev_sym_session {
- RTE_STD_C11
- struct {
- uint8_t dev_id;
- /**< Device Id */
- enum rte_cryptodev_type dev_type;
- /** Crypto Device type session created on */
- struct rte_mempool *mp;
- /**< Mempool session allocated from */
- } __rte_aligned(8);
- /**< Public symmetric session details */
-
- __extension__ char _private[0];
+ __extension__ void *sess_private_data[0];
/**< Private session material */
};
/**
- * Initialise a session for symmetric cryptographic operations.
+ * Create symmetric crypto session header (generic with no private data)
*
- * This function is used by the client to initialize immutable
- * parameters of symmetric cryptographic operation.
- * To perform the operation the rte_cryptodev_enqueue_burst function is
- * used. Each mbuf should contain a reference to the session
- * pointer returned from this function contained within it's crypto_op if a
- * session-based operation is being provisioned. Memory to contain the session
- * information is allocated from within mempool managed by the cryptodev.
+ * @param mempool Symmetric session mempool to allocate session
+ * objects from
+ * @return
+ * - On success return pointer to sym-session
+ * - On failure returns NULL
+ */
+struct rte_cryptodev_sym_session *
+rte_cryptodev_sym_session_create(struct rte_mempool *mempool);
+
+/**
+ * Frees symmetric crypto session header, after checking that all
+ * the device private data has been freed, returning it
+ * to its original mempool.
*
- * The rte_cryptodev_session_free must be called to free allocated
- * memory when the session is no longer required.
+ * @param sess Session header to be freed.
*
- * @param dev_id The device identifier.
- * @param xform Crypto transform chain.
+ * @return
+ * - 0 if successful.
+ * - -EINVAL if session is NULL.
+ * - -EBUSY if not all device private data has been freed.
+ */
+int
+rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess);
+
+/**
+ * Fill out private data for the device id, based on its device type.
+ *
+ * @param dev_id ID of device that we want the session to be used on
+ * @param sess Session where the private data will be attached to
+ * @param xforms Symmetric crypto transform operations to apply on flow
+ * processed with this session
+ * @param mempool Mempool where the private data is allocated.
+ *
+ * @return
+ * - On success, zero.
+ * - -EINVAL if input parameters are invalid.
+ * - -ENOTSUP if crypto device does not support the crypto transform.
+ * - -ENOMEM if the private session could not be allocated.
+ */
+int
+rte_cryptodev_sym_session_init(uint8_t dev_id,
+ struct rte_cryptodev_sym_session *sess,
+ struct rte_crypto_sym_xform *xforms,
+ struct rte_mempool *mempool);
+/**
+ * Frees private data for the device id, based on its device type,
+ * returning it to its mempool.
+ *
+ * @param dev_id ID of device that uses the session.
+ * @param sess Session containing the reference to the private data
*
* @return
- * Pointer to the created session or NULL
+ * - 0 if successful.
+ * - -EINVAL if device is invalid or session is NULL.
*/
-extern struct rte_cryptodev_sym_session *
-rte_cryptodev_sym_session_create(uint8_t dev_id,
- struct rte_crypto_sym_xform *xform);
+int
+rte_cryptodev_sym_session_clear(uint8_t dev_id,
+ struct rte_cryptodev_sym_session *sess);
/**
- * Free the memory associated with a previously allocated session.
+ * Get the size of the header session, for all registered drivers.
+ *
+ * @return
+ * Size of the header session.
+ */
+unsigned int
+rte_cryptodev_get_header_session_size(void);
+
+/**
+ * Get the size of the private session data for a device.
*
* @param dev_id The device identifier.
- * @param session Session pointer previously allocated by
- * *rte_cryptodev_sym_session_create*.
*
* @return
- * NULL on successful freeing of session.
- * Session pointer on failure to free session.
+ * - Size of the private data, if successful
+ * - 0 if device is invalid or does not have private session
*/
-extern struct rte_cryptodev_sym_session *
-rte_cryptodev_sym_session_free(uint8_t dev_id,
- struct rte_cryptodev_sym_session *session);
+unsigned int
+rte_cryptodev_get_private_session_size(uint8_t dev_id);
/**
* Attach queue pair with sym session.
*
- * @param qp_id Queue pair to which session will be attached.
+ * @param dev_id Device to which the session will be attached.
+ * @param qp_id Queue pair to which the session will be attached.
* @param session Session pointer previously allocated by
* *rte_cryptodev_sym_session_create*.
*
@@ -934,13 +986,14 @@ rte_cryptodev_sym_session_free(uint8_t dev_id,
* - On failure, a negative value.
*/
int
-rte_cryptodev_queue_pair_attach_sym_session(uint16_t qp_id,
+rte_cryptodev_queue_pair_attach_sym_session(uint8_t dev_id, uint16_t qp_id,
struct rte_cryptodev_sym_session *session);
/**
* Detach queue pair with sym session.
*
- * @param qp_id Queue pair to which session is attached.
+ * @param dev_id Device to which the session is attached.
+ * @param qp_id Queue pair to which the session is attached.
* @param session Session pointer previously allocated by
* *rte_cryptodev_sym_session_create*.
*
@@ -949,9 +1002,48 @@ rte_cryptodev_queue_pair_attach_sym_session(uint16_t qp_id,
* - On failure, a negative value.
*/
int
-rte_cryptodev_queue_pair_detach_sym_session(uint16_t qp_id,
+rte_cryptodev_queue_pair_detach_sym_session(uint8_t dev_id, uint16_t qp_id,
struct rte_cryptodev_sym_session *session);
+/**
+ * Provide driver identifier.
+ *
+ * @param name
+ * The pointer to a driver name.
+ * @return
+ * The driver type identifier or -1 if no driver found
+ */
+int rte_cryptodev_driver_id_get(const char *name);
+
+/**
+ * Provide driver name.
+ *
+ * @param driver_id
+ * The driver identifier.
+ * @return
+ * The driver name or null if no driver found
+ */
+const char *rte_cryptodev_driver_name_get(uint8_t driver_id);
+
+/**
+ * @internal
+ * Allocate Cryptodev driver.
+ *
+ * @param driver
+ * Pointer to rte_driver.
+ * @return
+ * The driver type identifier
+ */
+uint8_t rte_cryptodev_allocate_driver(const struct rte_driver *driver);
+
+
+#define RTE_PMD_REGISTER_CRYPTO_DRIVER(drv, driver_id)\
+RTE_INIT(init_ ##driver_id);\
+static void init_ ##driver_id(void)\
+{\
+ driver_id = rte_cryptodev_allocate_driver(&(drv).driver);\
+}
+
#ifdef __cplusplus
}
diff --git a/lib/librte_cryptodev/rte_cryptodev_pci.h b/lib/librte_cryptodev/rte_cryptodev_pci.h
new file mode 100644
index 00000000..67eda96a
--- /dev/null
+++ b/lib/librte_cryptodev/rte_cryptodev_pci.h
@@ -0,0 +1,92 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CRYPTODEV_PCI_H_
+#define _RTE_CRYPTODEV_PCI_H_
+
+#include <rte_pci.h>
+#include "rte_cryptodev.h"
+
+/**
+ * Initialisation function of a crypto driver invoked for each matching
+ * crypto PCI device detected during the PCI probing phase.
+ *
+ * @param dev The dev pointer is the address of the *rte_cryptodev*
+ * structure associated with the matching device and which
+ * has been [automatically] allocated in the
+ * *rte_crypto_devices* array.
+ *
+ * @return
+ * - 0: Success, the device is properly initialised by the driver.
+ * In particular, the driver MUST have set up the *dev_ops* pointer
+ * of the *dev* structure.
+ * - <0: Error code of the device initialisation failure.
+ */
+typedef int (*cryptodev_pci_init_t)(struct rte_cryptodev *dev);
+
+/**
+ * Finalisation function of a driver invoked for each matching
+ * PCI device detected during the PCI closing phase.
+ *
+ * @param dev The dev pointer is the address of the *rte_cryptodev*
+ * structure associated with the matching device and which
+ * has been [automatically] allocated in the
+ * *rte_crypto_devices* array.
+ *
+ * * @return
+ * - 0: Success, the device is properly finalised by the driver.
+ * In particular, the driver MUST free the *dev_ops* pointer
+ * of the *dev* structure.
+ * - <0: Error code of the device initialisation failure.
+ */
+typedef int (*cryptodev_pci_uninit_t)(struct rte_cryptodev *dev);
+
+/**
+ * @internal
+ * Wrapper for use by pci drivers as a .probe function to attach to a crypto
+ * interface.
+ */
+int
+rte_cryptodev_pci_generic_probe(struct rte_pci_device *pci_dev,
+ size_t private_data_size,
+ cryptodev_pci_init_t dev_init);
+
+/**
+ * @internal
+ * Wrapper for use by pci drivers as a .remove function to detach a crypto
+ * interface.
+ */
+int
+rte_cryptodev_pci_generic_remove(struct rte_pci_device *pci_dev,
+ cryptodev_pci_uninit_t dev_uninit);
+
+#endif /* _RTE_CRYPTODEV_PCI_H_ */
diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.c b/lib/librte_cryptodev/rte_cryptodev_pmd.c
new file mode 100644
index 00000000..a57faadc
--- /dev/null
+++ b/lib/librte_cryptodev/rte_cryptodev_pmd.c
@@ -0,0 +1,249 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_malloc.h>
+
+#include "rte_cryptodev_vdev.h"
+#include "rte_cryptodev_pci.h"
+#include "rte_cryptodev_pmd.h"
+
+/**
+ * Parse name from argument
+ */
+static int
+rte_cryptodev_vdev_parse_name_arg(const char *key __rte_unused,
+ const char *value, void *extra_args)
+{
+ struct rte_crypto_vdev_init_params *params = extra_args;
+
+ if (strlen(value) >= RTE_CRYPTODEV_NAME_MAX_LEN - 1) {
+ CDEV_LOG_ERR("Invalid name %s, should be less than "
+ "%u bytes", value,
+ RTE_CRYPTODEV_NAME_MAX_LEN - 1);
+ return -1;
+ }
+
+ strncpy(params->name, value, RTE_CRYPTODEV_NAME_MAX_LEN);
+
+ return 0;
+}
+
+/**
+ * Parse integer from argument
+ */
+static int
+rte_cryptodev_vdev_parse_integer_arg(const char *key __rte_unused,
+ const char *value, void *extra_args)
+{
+ int *i = extra_args;
+
+ *i = atoi(value);
+ if (*i < 0) {
+ CDEV_LOG_ERR("Argument has to be positive.");
+ return -1;
+ }
+
+ return 0;
+}
+
+struct rte_cryptodev *
+rte_cryptodev_vdev_pmd_init(const char *name, size_t dev_private_size,
+ int socket_id, struct rte_vdev_device *vdev)
+{
+ struct rte_cryptodev *cryptodev;
+
+ /* allocate device structure */
+ cryptodev = rte_cryptodev_pmd_allocate(name, socket_id);
+ if (cryptodev == NULL)
+ return NULL;
+
+ /* allocate private device structure */
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ cryptodev->data->dev_private =
+ rte_zmalloc_socket("cryptodev device private",
+ dev_private_size,
+ RTE_CACHE_LINE_SIZE,
+ socket_id);
+
+ if (cryptodev->data->dev_private == NULL)
+ rte_panic("Cannot allocate memzone for private device"
+ " data");
+ }
+
+ cryptodev->device = &vdev->device;
+
+ /* initialise user call-back tail queue */
+ TAILQ_INIT(&(cryptodev->link_intr_cbs));
+
+ return cryptodev;
+}
+
+int
+rte_cryptodev_vdev_parse_init_params(struct rte_crypto_vdev_init_params *params,
+ const char *input_args)
+{
+ struct rte_kvargs *kvlist = NULL;
+ int ret = 0;
+
+ if (params == NULL)
+ return -EINVAL;
+
+ if (input_args) {
+ kvlist = rte_kvargs_parse(input_args,
+ cryptodev_vdev_valid_params);
+ if (kvlist == NULL)
+ return -1;
+
+ ret = rte_kvargs_process(kvlist,
+ RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG,
+ &rte_cryptodev_vdev_parse_integer_arg,
+ &params->max_nb_queue_pairs);
+ if (ret < 0)
+ goto free_kvlist;
+
+ ret = rte_kvargs_process(kvlist,
+ RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG,
+ &rte_cryptodev_vdev_parse_integer_arg,
+ &params->max_nb_sessions);
+ if (ret < 0)
+ goto free_kvlist;
+
+ ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_SOCKET_ID,
+ &rte_cryptodev_vdev_parse_integer_arg,
+ &params->socket_id);
+ if (ret < 0)
+ goto free_kvlist;
+
+ ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_NAME,
+ &rte_cryptodev_vdev_parse_name_arg,
+ params);
+ if (ret < 0)
+ goto free_kvlist;
+ }
+
+free_kvlist:
+ rte_kvargs_free(kvlist);
+ return ret;
+}
+
+int
+rte_cryptodev_pci_generic_probe(struct rte_pci_device *pci_dev,
+ size_t private_data_size,
+ cryptodev_pci_init_t dev_init)
+{
+ struct rte_cryptodev *cryptodev;
+
+ char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN];
+
+ int retval;
+
+ rte_pci_device_name(&pci_dev->addr, cryptodev_name,
+ sizeof(cryptodev_name));
+
+ cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, rte_socket_id());
+ if (cryptodev == NULL)
+ return -ENOMEM;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ cryptodev->data->dev_private =
+ rte_zmalloc_socket(
+ "cryptodev private structure",
+ private_data_size,
+ RTE_CACHE_LINE_SIZE,
+ rte_socket_id());
+
+ if (cryptodev->data->dev_private == NULL)
+ rte_panic("Cannot allocate memzone for private "
+ "device data");
+ }
+
+ cryptodev->device = &pci_dev->device;
+
+ /* init user callbacks */
+ TAILQ_INIT(&(cryptodev->link_intr_cbs));
+
+ /* Invoke PMD device initialization function */
+ RTE_FUNC_PTR_OR_ERR_RET(*dev_init, -EINVAL);
+ retval = dev_init(cryptodev);
+ if (retval == 0)
+ return 0;
+
+ CDEV_LOG_ERR("driver %s: crypto_dev_init(vendor_id=0x%x device_id=0x%x)"
+ " failed", pci_dev->device.driver->name,
+ (unsigned int) pci_dev->id.vendor_id,
+ (unsigned int) pci_dev->id.device_id);
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ rte_free(cryptodev->data->dev_private);
+
+ /* free crypto device */
+ rte_cryptodev_pmd_release_device(cryptodev);
+
+ return -ENXIO;
+}
+
+int
+rte_cryptodev_pci_generic_remove(struct rte_pci_device *pci_dev,
+ cryptodev_pci_uninit_t dev_uninit)
+{
+ struct rte_cryptodev *cryptodev;
+ char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN];
+ int ret;
+
+ if (pci_dev == NULL)
+ return -EINVAL;
+
+ rte_pci_device_name(&pci_dev->addr, cryptodev_name,
+ sizeof(cryptodev_name));
+
+ cryptodev = rte_cryptodev_pmd_get_named_dev(cryptodev_name);
+ if (cryptodev == NULL)
+ return -ENODEV;
+
+ /* Invoke PMD device uninit function */
+ if (dev_uninit) {
+ ret = dev_uninit(cryptodev);
+ if (ret)
+ return ret;
+ }
+
+ /* free crypto device */
+ rte_cryptodev_pmd_release_device(cryptodev);
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ rte_free(cryptodev->data->dev_private);
+
+ cryptodev->device = NULL;
+ cryptodev->data = NULL;
+
+ return 0;
+}
diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h
index 17ef37c7..c983eb21 100644
--- a/lib/librte_cryptodev/rte_cryptodev_pmd.h
+++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h
@@ -47,7 +47,6 @@ extern "C" {
#include <string.h>
#include <rte_dev.h>
-#include <rte_pci.h>
#include <rte_malloc.h>
#include <rte_mbuf.h>
#include <rte_mempool.h>
@@ -57,80 +56,6 @@ extern "C" {
#include "rte_crypto.h"
#include "rte_cryptodev.h"
-struct rte_cryptodev_session {
- RTE_STD_C11
- struct {
- uint8_t dev_id;
- enum rte_cryptodev_type type;
- struct rte_mempool *mp;
- } __rte_aligned(8);
-
- __extension__ char _private[0];
-};
-
-struct rte_cryptodev_driver;
-
-/**
- * Initialisation function of a crypto driver invoked for each matching
- * crypto PCI device detected during the PCI probing phase.
- *
- * @param drv The pointer to the [matching] crypto driver structure
- * supplied by the PMD when it registered itself.
- * @param dev The dev pointer is the address of the *rte_cryptodev*
- * structure associated with the matching device and which
- * has been [automatically] allocated in the
- * *rte_crypto_devices* array.
- *
- * @return
- * - 0: Success, the device is properly initialised by the driver.
- * In particular, the driver MUST have set up the *dev_ops* pointer
- * of the *dev* structure.
- * - <0: Error code of the device initialisation failure.
- */
-typedef int (*cryptodev_init_t)(struct rte_cryptodev_driver *drv,
- struct rte_cryptodev *dev);
-
-/**
- * Finalisation function of a driver invoked for each matching
- * PCI device detected during the PCI closing phase.
- *
- * @param drv The pointer to the [matching] driver structure supplied
- * by the PMD when it registered itself.
- * @param dev The dev pointer is the address of the *rte_cryptodev*
- * structure associated with the matching device and which
- * has been [automatically] allocated in the
- * *rte_crypto_devices* array.
- *
- * * @return
- * - 0: Success, the device is properly finalised by the driver.
- * In particular, the driver MUST free the *dev_ops* pointer
- * of the *dev* structure.
- * - <0: Error code of the device initialisation failure.
- */
-typedef int (*cryptodev_uninit_t)(const struct rte_cryptodev_driver *drv,
- struct rte_cryptodev *dev);
-
-/**
- * The structure associated with a PMD driver.
- *
- * Each driver acts as a PCI driver and is represented by a generic
- * *crypto_driver* structure that holds:
- *
- * - An *rte_pci_driver* structure (which must be the first field).
- *
- * - The *cryptodev_init* function invoked for each matching PCI device.
- *
- * - The size of the private data to allocate for each matching device.
- */
-struct rte_cryptodev_driver {
- struct rte_pci_driver pci_drv; /**< The PMD is also a PCI driver. */
- unsigned dev_private_size; /**< Size of device private data. */
-
- cryptodev_init_t cryptodev_init; /**< Device init function. */
- cryptodev_uninit_t cryptodev_uninit; /**< Device uninit function. */
-};
-
-
/** Global structure used for maintaining state of allocated crypto devices */
struct rte_cryptodev_global {
struct rte_cryptodev *devs; /**< Device information array */
@@ -282,12 +207,13 @@ typedef int (*cryptodev_queue_pair_stop_t)(struct rte_cryptodev *dev,
* @param qp_id Queue Pair Index
* @param qp_conf Queue configuration structure
* @param socket_id Socket Index
+ * @param session_pool Pointer to device session mempool
*
* @return Returns 0 on success.
*/
typedef int (*cryptodev_queue_pair_setup_t)(struct rte_cryptodev *dev,
uint16_t qp_id, const struct rte_cryptodev_qp_conf *qp_conf,
- int socket_id);
+ int socket_id, struct rte_mempool *session_pool);
/**
* Release memory resources allocated by given queue pair.
@@ -341,39 +267,32 @@ typedef unsigned (*cryptodev_sym_get_session_private_size_t)(
struct rte_cryptodev *dev);
/**
- * Initialize a Crypto session on a device.
+ * Configure a Crypto session on a device.
*
* @param dev Crypto device pointer
* @param xform Single or chain of crypto xforms
* @param priv_sess Pointer to cryptodev's private session structure
+ * @param mp Mempool where the private session is allocated
*
* @return
- * - Returns private session structure on success.
- * - Returns NULL on failure.
+ * - Returns 0 if private session structure have been created successfully.
+ * - Returns -EINVAL if input parameters are invalid.
+ * - Returns -ENOTSUP if crypto device does not support the crypto transform.
+ * - Returns -ENOMEM if the private session could not be allocated.
*/
-typedef void (*cryptodev_sym_initialize_session_t)(struct rte_mempool *mempool,
- void *session_private);
+typedef int (*cryptodev_sym_configure_session_t)(struct rte_cryptodev *dev,
+ struct rte_crypto_sym_xform *xform,
+ struct rte_cryptodev_sym_session *session,
+ struct rte_mempool *mp);
/**
- * Configure a Crypto session on a device.
+ * Free driver private session data.
*
* @param dev Crypto device pointer
- * @param xform Single or chain of crypto xforms
- * @param priv_sess Pointer to cryptodev's private session structure
- *
- * @return
- * - Returns private session structure on success.
- * - Returns NULL on failure.
- */
-typedef void * (*cryptodev_sym_configure_session_t)(struct rte_cryptodev *dev,
- struct rte_crypto_sym_xform *xform, void *session_private);
-
-/**
- * Free Crypto session.
- * @param session Cryptodev session structure to free
+ * @param sess Cryptodev session structure
*/
typedef void (*cryptodev_sym_free_session_t)(struct rte_cryptodev *dev,
- void *session_private);
+ struct rte_cryptodev_sym_session *sess);
/**
* Optional API for drivers to attach sessions with queue pair.
@@ -428,8 +347,6 @@ struct rte_cryptodev_ops {
cryptodev_sym_get_session_private_size_t session_get_size;
/**< Return private session. */
- cryptodev_sym_initialize_session_t session_initialize;
- /**< Initialization function for private session data */
cryptodev_sym_configure_session_t session_configure;
/**< Configure a Crypto session. */
cryptodev_sym_free_session_t session_clear;
@@ -456,23 +373,6 @@ struct rte_cryptodev *
rte_cryptodev_pmd_allocate(const char *name, int socket_id);
/**
- * Creates a new virtual crypto device and returns the pointer
- * to that device.
- *
- * @param name PMD type name
- * @param dev_private_size Size of crypto PMDs private data
- * @param socket_id Socket to allocate resources on.
- *
- * @return
- * - Cryptodev pointer if device is successfully created.
- * - NULL if device cannot be created.
- */
-struct rte_cryptodev *
-rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size,
- int socket_id);
-
-
-/**
* Function for internal use by dummy drivers primarily, e.g. ring-based
* driver.
* Release the specified cryptodev device.
@@ -499,25 +399,25 @@ void rte_cryptodev_pmd_callback_process(struct rte_cryptodev *dev,
enum rte_cryptodev_event_type event);
/**
- * Wrapper for use by pci drivers as a .probe function to attach to a crypto
- * interface.
- */
-int rte_cryptodev_pci_probe(struct rte_pci_driver *pci_drv,
- struct rte_pci_device *pci_dev);
-
-/**
- * Wrapper for use by pci drivers as a .remove function to detach a crypto
- * interface.
- */
-int rte_cryptodev_pci_remove(struct rte_pci_device *pci_dev);
-
-/**
* @internal
* Create unique device name
*/
int
rte_cryptodev_pmd_create_dev_name(char *name, const char *dev_name_prefix);
+static inline void *
+get_session_private_data(const struct rte_cryptodev_sym_session *sess,
+ uint8_t driver_id) {
+ return sess->sess_private_data[driver_id];
+}
+
+static inline void
+set_session_private_data(struct rte_cryptodev_sym_session *sess,
+ uint8_t driver_id, void *private_data)
+{
+ sess->sess_private_data[driver_id] = private_data;
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_cryptodev/rte_cryptodev_vdev.h b/lib/librte_cryptodev/rte_cryptodev_vdev.h
new file mode 100644
index 00000000..94ab9d33
--- /dev/null
+++ b/lib/librte_cryptodev/rte_cryptodev_vdev.h
@@ -0,0 +1,100 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_CRYPTODEV_VDEV_H_
+#define _RTE_CRYPTODEV_VDEV_H_
+
+#include <rte_vdev.h>
+#include <inttypes.h>
+
+#include "rte_cryptodev.h"
+
+#define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_QUEUE_PAIRS 8
+#define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_SESSIONS 2048
+
+#define RTE_CRYPTODEV_VDEV_NAME ("name")
+#define RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG ("max_nb_queue_pairs")
+#define RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG ("max_nb_sessions")
+#define RTE_CRYPTODEV_VDEV_SOCKET_ID ("socket_id")
+
+static const char * const cryptodev_vdev_valid_params[] = {
+ RTE_CRYPTODEV_VDEV_NAME,
+ RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG,
+ RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG,
+ RTE_CRYPTODEV_VDEV_SOCKET_ID
+};
+
+/**
+ * @internal
+ * Initialisation parameters for virtual crypto devices
+ */
+struct rte_crypto_vdev_init_params {
+ unsigned int max_nb_queue_pairs;
+ unsigned int max_nb_sessions;
+ uint8_t socket_id;
+ char name[RTE_CRYPTODEV_NAME_MAX_LEN];
+};
+
+/**
+ * @internal
+ * Creates a new virtual crypto device and returns the pointer
+ * to that device.
+ *
+ * @param name PMD type name
+ * @param dev_private_size Size of crypto PMDs private data
+ * @param socket_id Socket to allocate resources on.
+ * @param vdev Pointer to virtual device structure.
+ *
+ * @return
+ * - Cryptodev pointer if device is successfully created.
+ * - NULL if device cannot be created.
+ */
+struct rte_cryptodev *
+rte_cryptodev_vdev_pmd_init(const char *name, size_t dev_private_size,
+ int socket_id, struct rte_vdev_device *vdev);
+
+/**
+ * @internal
+ * Parse virtual device initialisation parameters input arguments
+ *
+ * @params params Initialisation parameters with defaults set.
+ * @params input_args Command line arguments
+ *
+ * @return
+ * 0 on successful parse
+ * <0 on failure to parse
+ */
+int
+rte_cryptodev_vdev_parse_init_params(struct rte_crypto_vdev_init_params *params,
+ const char *input_args);
+
+#endif /* _RTE_CRYPTODEV_VDEV_H_ */
diff --git a/lib/librte_cryptodev/rte_cryptodev_version.map b/lib/librte_cryptodev/rte_cryptodev_version.map
index 9ac510ec..e9ba88ac 100644
--- a/lib/librte_cryptodev/rte_cryptodev_version.map
+++ b/lib/librte_cryptodev/rte_cryptodev_version.map
@@ -6,7 +6,6 @@ DPDK_16.04 {
rte_cryptodev_callback_unregister;
rte_cryptodev_close;
rte_cryptodev_count;
- rte_cryptodev_count_devtype;
rte_cryptodev_configure;
rte_cryptodev_create_vdev;
rte_cryptodev_get_dev_id;
@@ -15,7 +14,6 @@ DPDK_16.04 {
rte_cryptodev_pmd_allocate;
rte_cryptodev_pmd_callback_process;
rte_cryptodev_pmd_release_device;
- rte_cryptodev_pmd_virtual_dev_init;
rte_cryptodev_sym_session_create;
rte_cryptodev_sym_session_free;
rte_cryptodev_socket_id;
@@ -32,21 +30,6 @@ DPDK_16.04 {
local: *;
};
-DPDK_16.07 {
- global:
-
- rte_cryptodev_parse_vdev_init_params;
-
-} DPDK_16.04;
-
-DPDK_16.11 {
- global:
-
- rte_cryptodev_pci_probe;
- rte_cryptodev_pci_remove;
-
-} DPDK_16.07;
-
DPDK_17.02 {
global:
@@ -63,7 +46,7 @@ DPDK_17.02 {
rte_crypto_cipher_algorithm_strings;
rte_crypto_cipher_operation_strings;
-} DPDK_16.11;
+} DPDK_16.04;
DPDK_17.05 {
global:
@@ -74,3 +57,25 @@ DPDK_17.05 {
rte_cryptodev_queue_pair_detach_sym_session;
} DPDK_17.02;
+
+DPDK_17.08 {
+ global:
+
+ rte_cryptodev_allocate_driver;
+ rte_cryptodev_device_count_by_driver;
+ rte_cryptodev_driver_id_get;
+ rte_cryptodev_driver_name_get;
+ rte_cryptodev_get_aead_algo_enum;
+ rte_cryptodev_get_header_session_size;
+ rte_cryptodev_get_private_session_size;
+ rte_cryptodev_pci_generic_probe;
+ rte_cryptodev_pci_generic_remove;
+ rte_cryptodev_sym_capability_check_aead;
+ rte_cryptodev_sym_session_init;
+ rte_cryptodev_sym_session_clear;
+ rte_cryptodev_vdev_parse_init_params;
+ rte_cryptodev_vdev_pmd_init;
+ rte_crypto_aead_algorithm_strings;
+ rte_crypto_aead_operation_strings;
+
+} DPDK_17.05;
diff --git a/lib/librte_distributor/Makefile b/lib/librte_distributor/Makefile
index 3ffb911c..b417ee7b 100644
--- a/lib/librte_distributor/Makefile
+++ b/lib/librte_distributor/Makefile
@@ -46,10 +46,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) := rte_distributor_v20.c
SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor.c
ifeq ($(CONFIG_RTE_ARCH_X86),y)
SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_match_sse.c
-# distributor SIMD algo needs SSE4.2 support
-ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSE4_2,$(CFLAGS)),)
-CFLAGS_rte_distributor_match_sse.o += -msse4.2
-endif
else
SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += rte_distributor_match_generic.c
endif
diff --git a/lib/librte_distributor/rte_distributor.c b/lib/librte_distributor/rte_distributor.c
index e4dfa7f0..20ba9ffb 100644
--- a/lib/librte_distributor/rte_distributor.c
+++ b/lib/librte_distributor/rte_distributor.c
@@ -41,7 +41,8 @@
#include <rte_errno.h>
#include <rte_string_fns.h>
#include <rte_eal_memconfig.h>
-#include <rte_compat.h>
+#include <rte_pause.h>
+
#include "rte_distributor_private.h"
#include "rte_distributor.h"
#include "rte_distributor_v20.h"
@@ -656,12 +657,10 @@ rte_distributor_create_v1705(const char *name,
d->num_workers = num_workers;
d->alg_type = alg_type;
+ d->dist_match_fn = RTE_DIST_MATCH_SCALAR;
#if defined(RTE_ARCH_X86)
- if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2))
- d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
- else
+ d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
#endif
- d->dist_match_fn = RTE_DIST_MATCH_SCALAR;
/*
* Set up the backog tags so they're pointing at the second cache
diff --git a/lib/librte_distributor/rte_distributor_v20.c b/lib/librte_distributor/rte_distributor_v20.c
index bb6c5d70..b09abecd 100644
--- a/lib/librte_distributor/rte_distributor_v20.c
+++ b/lib/librte_distributor/rte_distributor_v20.c
@@ -41,6 +41,8 @@
#include <rte_compat.h>
#include <rte_string_fns.h>
#include <rte_eal_memconfig.h>
+#include <rte_pause.h>
+
#include "rte_distributor_v20.h"
#include "rte_distributor_private.h"
diff --git a/lib/librte_eal/bsdapp/contigmem/contigmem.c b/lib/librte_eal/bsdapp/contigmem/contigmem.c
index da971deb..e8fb9087 100644
--- a/lib/librte_eal/bsdapp/contigmem/contigmem.c
+++ b/lib/librte_eal/bsdapp/contigmem/contigmem.c
@@ -50,24 +50,37 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/pmap.h>
+#include <vm/vm_param.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
+
+struct contigmem_buffer {
+ void *addr;
+ int refcnt;
+ struct mtx mtx;
+};
+
+struct contigmem_vm_handle {
+ int buffer_index;
+};
static int contigmem_load(void);
static int contigmem_unload(void);
static int contigmem_physaddr(SYSCTL_HANDLER_ARGS);
-static d_mmap_t contigmem_mmap;
static d_mmap_single_t contigmem_mmap_single;
static d_open_t contigmem_open;
+static d_close_t contigmem_close;
static int contigmem_num_buffers = RTE_CONTIGMEM_DEFAULT_NUM_BUFS;
static int64_t contigmem_buffer_size = RTE_CONTIGMEM_DEFAULT_BUF_SIZE;
static eventhandler_tag contigmem_eh_tag;
-static void *contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS];
+static struct contigmem_buffer contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS];
static struct cdev *contigmem_cdev = NULL;
+static int contigmem_refcnt;
TUNABLE_INT("hw.contigmem.num_buffers", &contigmem_num_buffers);
TUNABLE_QUAD("hw.contigmem.buffer_size", &contigmem_buffer_size);
@@ -78,6 +91,8 @@ SYSCTL_INT(_hw_contigmem, OID_AUTO, num_buffers, CTLFLAG_RD,
&contigmem_num_buffers, 0, "Number of contigmem buffers allocated");
SYSCTL_QUAD(_hw_contigmem, OID_AUTO, buffer_size, CTLFLAG_RD,
&contigmem_buffer_size, 0, "Size of each contiguous buffer");
+SYSCTL_INT(_hw_contigmem, OID_AUTO, num_references, CTLFLAG_RD,
+ &contigmem_refcnt, 0, "Number of references to contigmem");
static SYSCTL_NODE(_hw_contigmem, OID_AUTO, physaddr, CTLFLAG_RD, 0,
"physaddr");
@@ -114,42 +129,49 @@ MODULE_VERSION(contigmem, 1);
static struct cdevsw contigmem_ops = {
.d_name = "contigmem",
.d_version = D_VERSION,
- .d_mmap = contigmem_mmap,
+ .d_flags = D_TRACKCLOSE,
.d_mmap_single = contigmem_mmap_single,
.d_open = contigmem_open,
+ .d_close = contigmem_close,
};
static int
contigmem_load()
{
char index_string[8], description[32];
- int i;
+ int i, error = 0;
+ void *addr;
if (contigmem_num_buffers > RTE_CONTIGMEM_MAX_NUM_BUFS) {
printf("%d buffers requested is greater than %d allowed\n",
contigmem_num_buffers, RTE_CONTIGMEM_MAX_NUM_BUFS);
- return EINVAL;
+ error = EINVAL;
+ goto error;
}
if (contigmem_buffer_size < PAGE_SIZE ||
(contigmem_buffer_size & (contigmem_buffer_size - 1)) != 0) {
printf("buffer size 0x%lx is not greater than PAGE_SIZE and "
"power of two\n", contigmem_buffer_size);
- return EINVAL;
+ error = EINVAL;
+ goto error;
}
for (i = 0; i < contigmem_num_buffers; i++) {
- contigmem_buffers[i] =
- contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO, 0,
- BUS_SPACE_MAXADDR, contigmem_buffer_size, 0);
-
- if (contigmem_buffers[i] == NULL) {
+ addr = contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO,
+ 0, BUS_SPACE_MAXADDR, contigmem_buffer_size, 0);
+ if (addr == NULL) {
printf("contigmalloc failed for buffer %d\n", i);
- return ENOMEM;
+ error = ENOMEM;
+ goto error;
}
- printf("%2u: virt=%p phys=%p\n", i, contigmem_buffers[i],
- (void *)pmap_kextract((vm_offset_t)contigmem_buffers[i]));
+ printf("%2u: virt=%p phys=%p\n", i, addr,
+ (void *)pmap_kextract((vm_offset_t)addr));
+
+ mtx_init(&contigmem_buffers[i].mtx, "contigmem", NULL, MTX_DEF);
+ contigmem_buffers[i].addr = addr;
+ contigmem_buffers[i].refcnt = 0;
snprintf(index_string, sizeof(index_string), "%d", i);
snprintf(description, sizeof(description),
@@ -165,6 +187,17 @@ contigmem_load()
GID_WHEEL, 0600, "contigmem");
return 0;
+
+error:
+ for (i = 0; i < contigmem_num_buffers; i++) {
+ if (contigmem_buffers[i].addr != NULL)
+ contigfree(contigmem_buffers[i].addr,
+ contigmem_buffer_size, M_CONTIGMEM);
+ if (mtx_initialized(&contigmem_buffers[i].mtx))
+ mtx_destroy(&contigmem_buffers[i].mtx);
+ }
+
+ return error;
}
static int
@@ -172,16 +205,22 @@ contigmem_unload()
{
int i;
+ if (contigmem_refcnt > 0)
+ return EBUSY;
+
if (contigmem_cdev != NULL)
destroy_dev(contigmem_cdev);
if (contigmem_eh_tag != NULL)
EVENTHANDLER_DEREGISTER(process_exit, contigmem_eh_tag);
- for (i = 0; i < RTE_CONTIGMEM_MAX_NUM_BUFS; i++)
- if (contigmem_buffers[i] != NULL)
- contigfree(contigmem_buffers[i], contigmem_buffer_size,
- M_CONTIGMEM);
+ for (i = 0; i < RTE_CONTIGMEM_MAX_NUM_BUFS; i++) {
+ if (contigmem_buffers[i].addr != NULL)
+ contigfree(contigmem_buffers[i].addr,
+ contigmem_buffer_size, M_CONTIGMEM);
+ if (mtx_initialized(&contigmem_buffers[i].mtx))
+ mtx_destroy(&contigmem_buffers[i].mtx);
+ }
return 0;
}
@@ -192,7 +231,7 @@ contigmem_physaddr(SYSCTL_HANDLER_ARGS)
uint64_t physaddr;
int index = (int)(uintptr_t)arg1;
- physaddr = (uint64_t)vtophys(contigmem_buffers[index]);
+ physaddr = (uint64_t)vtophys(contigmem_buffers[index].addr);
return sysctl_handle_64(oidp, &physaddr, 0, req);
}
@@ -200,22 +239,121 @@ static int
contigmem_open(struct cdev *cdev, int fflags, int devtype,
struct thread *td)
{
+
+ atomic_add_int(&contigmem_refcnt, 1);
+
return 0;
}
static int
-contigmem_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr,
- int prot, vm_memattr_t *memattr)
+contigmem_close(struct cdev *cdev, int fflags, int devtype,
+ struct thread *td)
{
- *paddr = offset;
+ atomic_subtract_int(&contigmem_refcnt, 1);
+
return 0;
}
static int
+contigmem_cdev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
+ vm_ooffset_t foff, struct ucred *cred, u_short *color)
+{
+ struct contigmem_vm_handle *vmh = handle;
+ struct contigmem_buffer *buf;
+
+ buf = &contigmem_buffers[vmh->buffer_index];
+
+ atomic_add_int(&contigmem_refcnt, 1);
+
+ mtx_lock(&buf->mtx);
+ if (buf->refcnt == 0)
+ memset(buf->addr, 0, contigmem_buffer_size);
+ buf->refcnt++;
+ mtx_unlock(&buf->mtx);
+
+ return 0;
+}
+
+static void
+contigmem_cdev_pager_dtor(void *handle)
+{
+ struct contigmem_vm_handle *vmh = handle;
+ struct contigmem_buffer *buf;
+
+ buf = &contigmem_buffers[vmh->buffer_index];
+
+ mtx_lock(&buf->mtx);
+ buf->refcnt--;
+ mtx_unlock(&buf->mtx);
+
+ free(vmh, M_CONTIGMEM);
+
+ atomic_subtract_int(&contigmem_refcnt, 1);
+}
+
+static int
+contigmem_cdev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot,
+ vm_page_t *mres)
+{
+ vm_paddr_t paddr;
+ vm_page_t m_paddr, page;
+ vm_memattr_t memattr, memattr1;
+
+ memattr = object->memattr;
+
+ VM_OBJECT_WUNLOCK(object);
+
+ paddr = offset;
+
+ m_paddr = vm_phys_paddr_to_vm_page(paddr);
+ if (m_paddr != NULL) {
+ memattr1 = pmap_page_get_memattr(m_paddr);
+ if (memattr1 != memattr)
+ memattr = memattr1;
+ }
+
+ if (((*mres)->flags & PG_FICTITIOUS) != 0) {
+ /*
+ * If the passed in result page is a fake page, update it with
+ * the new physical address.
+ */
+ page = *mres;
+ VM_OBJECT_WLOCK(object);
+ vm_page_updatefake(page, paddr, memattr);
+ } else {
+ vm_page_t mret;
+ /*
+ * Replace the passed in reqpage page with our own fake page and
+ * free up the original page.
+ */
+ page = vm_page_getfake(paddr, memattr);
+ VM_OBJECT_WLOCK(object);
+ mret = vm_page_replace(page, object, (*mres)->pindex);
+ KASSERT(mret == *mres,
+ ("invalid page replacement, old=%p, ret=%p", *mres, mret));
+ vm_page_lock(mret);
+ vm_page_free(mret);
+ vm_page_unlock(mret);
+ *mres = page;
+ }
+
+ page->valid = VM_PAGE_BITS_ALL;
+
+ return VM_PAGER_OK;
+}
+
+static struct cdev_pager_ops contigmem_cdev_pager_ops = {
+ .cdev_pg_ctor = contigmem_cdev_pager_ctor,
+ .cdev_pg_dtor = contigmem_cdev_pager_dtor,
+ .cdev_pg_fault = contigmem_cdev_pager_fault,
+};
+
+static int
contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
struct vm_object **obj, int nprot)
{
+ struct contigmem_vm_handle *vmh;
uint64_t buffer_index;
/*
@@ -227,10 +365,17 @@ contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
if (buffer_index >= contigmem_num_buffers)
return EINVAL;
- memset(contigmem_buffers[buffer_index], 0, contigmem_buffer_size);
- *offset = (vm_ooffset_t)vtophys(contigmem_buffers[buffer_index]);
- *obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset,
- curthread->td_ucred);
+ if (size > contigmem_buffer_size)
+ return EINVAL;
+
+ vmh = malloc(sizeof(*vmh), M_CONTIGMEM, M_NOWAIT | M_ZERO);
+ if (vmh == NULL)
+ return ENOMEM;
+ vmh->buffer_index = buffer_index;
+
+ *offset = (vm_ooffset_t)vtophys(contigmem_buffers[buffer_index].addr);
+ *obj = cdev_pager_allocate(vmh, OBJT_DEVICE, &contigmem_cdev_pager_ops,
+ size, nprot, *offset, curthread->td_ucred);
return 0;
}
diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index a0f99502..005019ed 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -48,7 +48,7 @@ LDLIBS += -lgcc_s
EXPORT_MAP := rte_eal_version.map
-LIBABIVER := 4
+LIBABIVER := 5
# specific to bsdapp exec-env
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c
@@ -87,6 +87,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c
# from arch dir
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index 05f0c1f9..5fa59884 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -45,7 +45,6 @@
#include <stddef.h>
#include <errno.h>
#include <limits.h>
-#include <errno.h>
#include <sys/mman.h>
#include <sys/queue.h>
@@ -59,6 +58,7 @@
#include <rte_errno.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
+#include <rte_service_component.h>
#include <rte_log.h>
#include <rte_random.h>
#include <rte_cycles.h>
@@ -69,7 +69,6 @@
#include <rte_pci.h>
#include <rte_dev.h>
#include <rte_devargs.h>
-#include <rte_common.h>
#include <rte_version.h>
#include <rte_atomic.h>
#include <malloc_heap.h>
@@ -615,6 +614,11 @@ rte_eal_init(int argc, char **argv)
rte_config.master_lcore, thread_id, cpuset,
ret == 0 ? "" : "...");
+ if (eal_option_device_parse()) {
+ rte_errno = ENODEV;
+ return -1;
+ }
+
if (rte_bus_scan()) {
rte_eal_init_alert("Cannot scan the buses for devices\n");
rte_errno = ENODEV;
@@ -653,6 +657,14 @@ rte_eal_init(int argc, char **argv)
rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
rte_eal_mp_wait_lcore();
+ /* initialize services so vdevs register service during bus_probe. */
+ ret = rte_service_init();
+ if (ret) {
+ rte_eal_init_alert("rte_service_init() failed\n");
+ rte_errno = ENOEXEC;
+ return -1;
+ }
+
/* Probe all the buses and devices/drivers on them */
if (rte_bus_probe()) {
rte_eal_init_alert("Cannot probe devices\n");
@@ -660,6 +672,15 @@ rte_eal_init(int argc, char **argv)
return -1;
}
+ /* initialize default service/lcore mappings and start running. Ignore
+ * -ENOTSUP, as it indicates no service coremask passed to EAL.
+ */
+ ret = rte_service_start_with_defaults();
+ if (ret < 0 && ret != -ENOTSUP) {
+ rte_errno = ENOEXEC;
+ return -1;
+ }
+
rte_eal_mcfg_complete();
return fctret;
diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c
index e321461d..04eacdcc 100644
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -41,7 +41,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include <stdarg.h>
#include <errno.h>
#include <dirent.h>
#include <limits.h>
@@ -52,7 +51,6 @@
#include <dev/pci/pcireg.h>
#if defined(RTE_ARCH_X86)
-#include <sys/types.h>
#include <machine/cpufunc.h>
#endif
@@ -282,8 +280,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
/* FreeBSD has no NUMA support (yet) */
dev->device.numa_node = 0;
- rte_pci_device_name(&dev->addr, dev->name, sizeof(dev->name));
- dev->device.name = dev->name;
+ pci_name_set(dev);
/* FreeBSD has only one pass through driver */
dev->kdrv = RTE_KDRV_NIC_UIO;
@@ -334,6 +331,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
} else { /* already registered */
dev2->kdrv = dev->kdrv;
dev2->max_vfs = dev->max_vfs;
+ pci_name_set(dev2);
memmove(dev2->mem_resource,
dev->mem_resource,
sizeof(dev->mem_resource));
@@ -396,7 +394,7 @@ rte_pci_scan(void)
close(fd);
- RTE_LOG(ERR, EAL, "PCI scan found %u devices\n", dev_count);
+ RTE_LOG(DEBUG, EAL, "PCI scan found %u devices\n", dev_count);
return 0;
error:
diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c
index 1b8cd8a6..783d68c5 100644
--- a/lib/librte_eal/bsdapp/eal/eal_thread.c
+++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
@@ -49,7 +49,6 @@
#include <rte_memzone.h>
#include <rte_per_lcore.h>
#include <rte_eal.h>
-#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include "eal_private.h"
diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index 2e48a736..aac6fd77 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -193,3 +193,47 @@ DPDK_17.05 {
vfio_get_group_no;
} DPDK_17.02;
+
+DPDK_17.08 {
+ global:
+
+ rte_bus_find;
+ rte_bus_find_by_device;
+ rte_bus_find_by_name;
+ rte_log_get_level;
+
+} DPDK_17.05;
+
+EXPERIMENTAL {
+ global:
+
+ rte_eal_devargs_insert;
+ rte_eal_devargs_parse;
+ rte_eal_devargs_remove;
+ rte_eal_hotplug_add;
+ rte_eal_hotplug_remove;
+ rte_service_disable_on_lcore;
+ rte_service_dump;
+ rte_service_enable_on_lcore;
+ rte_service_get_by_id;
+ rte_service_get_by_name;
+ rte_service_get_count;
+ rte_service_get_enabled_on_lcore;
+ rte_service_is_running;
+ rte_service_lcore_add;
+ rte_service_lcore_count;
+ rte_service_lcore_del;
+ rte_service_lcore_list;
+ rte_service_lcore_reset_all;
+ rte_service_lcore_start;
+ rte_service_lcore_stop;
+ rte_service_probe_capability;
+ rte_service_register;
+ rte_service_reset;
+ rte_service_set_stats_enable;
+ rte_service_start;
+ rte_service_start_with_defaults;
+ rte_service_stop;
+ rte_service_unregister;
+
+} DPDK_17.08;
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index a5bd1089..e8fd67a2 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -41,10 +41,11 @@ INC += rte_eal_memconfig.h rte_malloc_heap.h
INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_vdev.h
INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
INC += rte_malloc.h rte_keepalive.h rte_time.h
+INC += rte_service.h rte_service_component.h
GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h
-GENERIC_INC += rte_vect.h rte_io.h
+GENERIC_INC += rte_vect.h rte_pause.h rte_io.h
# defined in mk/arch/$(RTE_ARCH)/rte.vars.mk
ARCH_DIR ?= $(RTE_ARCH)
diff --git a/lib/librte_eal/common/arch/arm/rte_cpuflags.c b/lib/librte_eal/common/arch/arm/rte_cpuflags.c
index 79160a60..5636e9c1 100644
--- a/lib/librte_eal/common/arch/arm/rte_cpuflags.c
+++ b/lib/librte_eal/common/arch/arm/rte_cpuflags.c
@@ -1,7 +1,7 @@
/*
* BSD LICENSE
*
- * Copyright (C) Cavium networks Ltd. 2015.
+ * Copyright (C) Cavium, Inc. 2015.
* Copyright(c) 2015 RehiveTech. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -14,7 +14,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c
index 8f9baf8b..08bec2d9 100644
--- a/lib/librte_eal/common/eal_common_bus.c
+++ b/lib/librte_eal/common/eal_common_bus.c
@@ -1,8 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2016 NXP
- * All rights reserved.
+ * Copyright 2016 NXP.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -50,6 +49,9 @@ rte_bus_register(struct rte_bus *bus)
/* A bus should mandatorily have the scan implemented */
RTE_VERIFY(bus->scan);
RTE_VERIFY(bus->probe);
+ RTE_VERIFY(bus->find_device);
+ /* Buses supporting driver plug also require unplug. */
+ RTE_VERIFY(!bus->plug || bus->unplug);
TAILQ_INSERT_TAIL(&rte_bus_list, bus, next);
RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name);
@@ -89,7 +91,7 @@ rte_bus_probe(void)
struct rte_bus *bus, *vbus = NULL;
TAILQ_FOREACH(bus, &rte_bus_list, next) {
- if (!strcmp(bus->name, "virtual")) {
+ if (!strcmp(bus->name, "vdev")) {
vbus = bus;
continue;
}
@@ -145,3 +147,78 @@ rte_bus_dump(FILE *f)
}
}
}
+
+struct rte_bus *
+rte_bus_find(const struct rte_bus *start, rte_bus_cmp_t cmp,
+ const void *data)
+{
+ struct rte_bus *bus = NULL;
+
+ TAILQ_FOREACH(bus, &rte_bus_list, next) {
+ if (start && bus == start) {
+ start = NULL; /* starting point found */
+ continue;
+ }
+ if (cmp(bus, data) == 0)
+ break;
+ }
+ return bus;
+}
+
+static int
+cmp_rte_device(const struct rte_device *dev1, const void *_dev2)
+{
+ const struct rte_device *dev2 = _dev2;
+
+ return dev1 != dev2;
+}
+
+static int
+bus_find_device(const struct rte_bus *bus, const void *_dev)
+{
+ struct rte_device *dev;
+
+ dev = bus->find_device(NULL, cmp_rte_device, _dev);
+ return dev == NULL;
+}
+
+struct rte_bus *
+rte_bus_find_by_device(const struct rte_device *dev)
+{
+ return rte_bus_find(NULL, bus_find_device, (const void *)dev);
+}
+
+static int
+cmp_bus_name(const struct rte_bus *bus, const void *_name)
+{
+ const char *name = _name;
+
+ return strcmp(bus->name, name);
+}
+
+struct rte_bus *
+rte_bus_find_by_name(const char *busname)
+{
+ return rte_bus_find(NULL, cmp_bus_name, (const void *)busname);
+}
+
+static int
+bus_can_parse(const struct rte_bus *bus, const void *_name)
+{
+ const char *name = _name;
+
+ return !(bus->parse && bus->parse(name, NULL) == 0);
+}
+
+struct rte_bus *
+rte_bus_find_by_device_name(const char *str)
+{
+ char name[RTE_DEV_NAME_MAX_LEN];
+ char *c;
+
+ snprintf(name, sizeof(name), "%s", str);
+ c = strchr(name, ',');
+ if (c != NULL)
+ c[0] = '\0';
+ return rte_bus_find(NULL, bus_can_parse, name);
+}
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index a400ddd0..e2512755 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -37,58 +37,210 @@
#include <inttypes.h>
#include <sys/queue.h>
+#include <rte_bus.h>
#include <rte_dev.h>
#include <rte_devargs.h>
#include <rte_debug.h>
-#include <rte_devargs.h>
#include <rte_log.h>
#include "eal_private.h"
+static int cmp_detached_dev_name(const struct rte_device *dev,
+ const void *_name)
+{
+ const char *name = _name;
+
+ /* skip attached devices */
+ if (dev->driver != NULL)
+ return 1;
+
+ return strcmp(dev->name, name);
+}
+
+static int cmp_dev_name(const struct rte_device *dev, const void *_name)
+{
+ const char *name = _name;
+
+ return strcmp(dev->name, name);
+}
+
int rte_eal_dev_attach(const char *name, const char *devargs)
{
- struct rte_pci_addr addr;
+ struct rte_bus *bus;
+ int ret;
if (name == NULL || devargs == NULL) {
RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n");
return -EINVAL;
}
- if (eal_parse_pci_DomBDF(name, &addr) == 0) {
- if (rte_pci_probe_one(&addr) < 0)
- goto err;
+ bus = rte_bus_find_by_device_name(name);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Unable to find a bus for the device '%s'\n",
+ name);
+ return -EINVAL;
+ }
+ if (strcmp(bus->name, "pci") == 0)
+ return rte_eal_hotplug_add("pci", name, devargs);
+ if (strcmp(bus->name, "vdev") != 0) {
+ RTE_LOG(ERR, EAL, "Device attach is only supported for PCI and vdev devices.\n");
+ return -ENOTSUP;
+ }
- } else {
- if (rte_vdev_init(name, devargs))
- goto err;
+ /*
+ * If we haven't found a bus device the user meant to "hotplug" a
+ * virtual device instead.
+ */
+ ret = rte_vdev_init(name, devargs);
+ if (ret)
+ RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
+ name);
+ return ret;
+}
+
+int rte_eal_dev_detach(struct rte_device *dev)
+{
+ struct rte_bus *bus;
+ int ret;
+
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "Invalid device provided.\n");
+ return -EINVAL;
}
- return 0;
+ bus = rte_bus_find_by_device(dev);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n",
+ dev->name);
+ return -EINVAL;
+ }
+
+ if (bus->unplug == NULL) {
+ RTE_LOG(ERR, EAL, "Bus function not supported\n");
+ return -ENOTSUP;
+ }
-err:
- RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", name);
- return -EINVAL;
+ ret = bus->unplug(dev);
+ if (ret)
+ RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
+ dev->name);
+ return ret;
}
-int rte_eal_dev_detach(const char *name)
+static char *
+full_dev_name(const char *bus, const char *dev, const char *args)
{
- struct rte_pci_addr addr;
+ char *name;
+ size_t len;
+ len = snprintf(NULL, 0, "%s:%s,%s", bus, dev, args) + 1;
+ name = calloc(1, len);
if (name == NULL) {
- RTE_LOG(ERR, EAL, "Invalid device provided.\n");
- return -EINVAL;
+ RTE_LOG(ERR, EAL, "Could not allocate full device name\n");
+ return NULL;
}
+ snprintf(name, len, "%s:%s,%s", bus, dev, args);
+ return name;
+}
- if (eal_parse_pci_DomBDF(name, &addr) == 0) {
- if (rte_pci_detach(&addr) < 0)
- goto err;
- } else {
- if (rte_vdev_uninit(name))
- goto err;
+int rte_eal_hotplug_add(const char *busname, const char *devname,
+ const char *devargs)
+{
+ struct rte_bus *bus;
+ struct rte_device *dev;
+ struct rte_devargs *da;
+ char *name;
+ int ret;
+
+ bus = rte_bus_find_by_name(busname);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", busname);
+ return -ENOENT;
+ }
+
+ if (bus->plug == NULL) {
+ RTE_LOG(ERR, EAL, "Function plug not supported by bus (%s)\n",
+ bus->name);
+ return -ENOTSUP;
+ }
+
+ name = full_dev_name(busname, devname, devargs);
+ if (name == NULL)
+ return -ENOMEM;
+
+ da = calloc(1, sizeof(*da));
+ if (da == NULL) {
+ ret = -ENOMEM;
+ goto err_name;
+ }
+
+ ret = rte_eal_devargs_parse(name, da);
+ if (ret)
+ goto err_devarg;
+
+ ret = rte_eal_devargs_insert(da);
+ if (ret)
+ goto err_devarg;
+
+ ret = bus->scan();
+ if (ret)
+ goto err_devarg;
+
+ dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n",
+ devname);
+ ret = -ENODEV;
+ goto err_devarg;
+ }
+
+ ret = bus->plug(dev);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
+ dev->name);
+ goto err_devarg;
}
+ free(name);
return 0;
-err:
- RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n", name);
- return -EINVAL;
+err_devarg:
+ if (rte_eal_devargs_remove(busname, devname)) {
+ free(da->args);
+ free(da);
+ }
+err_name:
+ free(name);
+ return ret;
+}
+
+int rte_eal_hotplug_remove(const char *busname, const char *devname)
+{
+ struct rte_bus *bus;
+ struct rte_device *dev;
+ int ret;
+
+ bus = rte_bus_find_by_name(busname);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", busname);
+ return -ENOENT;
+ }
+
+ if (bus->unplug == NULL) {
+ RTE_LOG(ERR, EAL, "Function unplug not supported by bus (%s)\n",
+ bus->name);
+ return -ENOTSUP;
+ }
+
+ dev = bus->find_device(NULL, cmp_dev_name, devname);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", devname);
+ return -EINVAL;
+ }
+
+ ret = bus->unplug(dev);
+ if (ret)
+ RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
+ dev->name);
+ rte_eal_devargs_remove(busname, devname);
+ return ret;
}
diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
index ffa8ad96..6ac88d6a 100644
--- a/lib/librte_eal/common/eal_common_devargs.c
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -40,8 +40,9 @@
#include <stdio.h>
#include <string.h>
-#include <rte_pci.h>
+#include <rte_dev.h>
#include <rte_devargs.h>
+#include <rte_tailq.h>
#include "eal_private.h"
/** Global list of user devices */
@@ -78,50 +79,107 @@ rte_eal_parse_devargs_str(const char *devargs_str,
return 0;
}
+static int
+bus_name_cmp(const struct rte_bus *bus, const void *name)
+{
+ return strncmp(bus->name, name, strlen(bus->name));
+}
+
+int
+rte_eal_devargs_parse(const char *dev, struct rte_devargs *da)
+{
+ struct rte_bus *bus = NULL;
+ const char *devname;
+ const size_t maxlen = sizeof(da->name);
+ size_t i;
+
+ if (dev == NULL || da == NULL)
+ return -EINVAL;
+ /* Retrieve eventual bus info */
+ do {
+ devname = dev;
+ bus = rte_bus_find(bus, bus_name_cmp, dev);
+ if (bus == NULL)
+ break;
+ devname = dev + strlen(bus->name) + 1;
+ if (rte_bus_find_by_device_name(devname) == bus)
+ break;
+ } while (1);
+ /* Store device name */
+ i = 0;
+ while (devname[i] != '\0' && devname[i] != ',') {
+ da->name[i] = devname[i];
+ i++;
+ if (i == maxlen) {
+ fprintf(stderr, "WARNING: Parsing \"%s\": device name should be shorter than %zu\n",
+ dev, maxlen);
+ da->name[i - 1] = '\0';
+ return -EINVAL;
+ }
+ }
+ da->name[i] = '\0';
+ if (bus == NULL) {
+ bus = rte_bus_find_by_device_name(da->name);
+ if (bus == NULL) {
+ fprintf(stderr, "ERROR: failed to parse device \"%s\"\n",
+ da->name);
+ return -EFAULT;
+ }
+ }
+ da->bus = bus;
+ /* Parse eventual device arguments */
+ if (devname[i] == ',')
+ da->args = strdup(&devname[i + 1]);
+ else
+ da->args = strdup("");
+ if (da->args == NULL) {
+ fprintf(stderr, "ERROR: not enough memory to parse arguments\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+int
+rte_eal_devargs_insert(struct rte_devargs *da)
+{
+ int ret;
+
+ ret = rte_eal_devargs_remove(da->bus->name, da->name);
+ if (ret < 0)
+ return ret;
+ TAILQ_INSERT_TAIL(&devargs_list, da, next);
+ return 0;
+}
+
/* store a whitelist parameter for later parsing */
int
rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
{
struct rte_devargs *devargs = NULL;
- char *buf = NULL;
- int ret;
+ struct rte_bus *bus = NULL;
+ const char *dev = devargs_str;
- /* use malloc instead of rte_malloc as it's called early at init */
- devargs = malloc(sizeof(*devargs));
+ /* use calloc instead of rte_zmalloc as it's called early at init */
+ devargs = calloc(1, sizeof(*devargs));
if (devargs == NULL)
goto fail;
- memset(devargs, 0, sizeof(*devargs));
- devargs->type = devtype;
-
- if (rte_eal_parse_devargs_str(devargs_str, &buf, &devargs->args))
+ if (rte_eal_devargs_parse(dev, devargs))
goto fail;
-
- switch (devargs->type) {
- case RTE_DEVTYPE_WHITELISTED_PCI:
- case RTE_DEVTYPE_BLACKLISTED_PCI:
- /* try to parse pci identifier */
- if (eal_parse_pci_BDF(buf, &devargs->pci.addr) != 0 &&
- eal_parse_pci_DomBDF(buf, &devargs->pci.addr) != 0)
- goto fail;
-
- break;
- case RTE_DEVTYPE_VIRTUAL:
- /* save driver name */
- ret = snprintf(devargs->virt.drv_name,
- sizeof(devargs->virt.drv_name), "%s", buf);
- if (ret < 0 || ret >= (int)sizeof(devargs->virt.drv_name))
- goto fail;
-
- break;
+ devargs->type = devtype;
+ bus = devargs->bus;
+ if (devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI)
+ devargs->policy = RTE_DEV_BLACKLISTED;
+ if (bus->conf.scan_mode == RTE_BUS_SCAN_UNDEFINED) {
+ if (devargs->policy == RTE_DEV_WHITELISTED)
+ bus->conf.scan_mode = RTE_BUS_SCAN_WHITELIST;
+ else if (devargs->policy == RTE_DEV_BLACKLISTED)
+ bus->conf.scan_mode = RTE_BUS_SCAN_BLACKLIST;
}
-
- free(buf);
TAILQ_INSERT_TAIL(&devargs_list, devargs, next);
return 0;
fail:
- free(buf);
if (devargs) {
free(devargs->args);
free(devargs);
@@ -130,6 +188,24 @@ fail:
return -1;
}
+int
+rte_eal_devargs_remove(const char *busname, const char *devname)
+{
+ struct rte_devargs *d;
+ void *tmp;
+
+ TAILQ_FOREACH_SAFE(d, &devargs_list, next, tmp) {
+ if (strcmp(d->bus->name, busname) == 0 &&
+ strcmp(d->name, devname) == 0) {
+ TAILQ_REMOVE(&devargs_list, d, next);
+ free(d->args);
+ free(d);
+ return 0;
+ }
+ }
+ return 1;
+}
+
/* count the number of devices of a specified type */
unsigned int
rte_eal_devargs_type_count(enum rte_devtype devtype)
@@ -151,27 +227,10 @@ rte_eal_devargs_dump(FILE *f)
{
struct rte_devargs *devargs;
- fprintf(f, "User device white list:\n");
+ fprintf(f, "User device list:\n");
TAILQ_FOREACH(devargs, &devargs_list, next) {
- if (devargs->type == RTE_DEVTYPE_WHITELISTED_PCI)
- fprintf(f, " PCI whitelist " PCI_PRI_FMT " %s\n",
- devargs->pci.addr.domain,
- devargs->pci.addr.bus,
- devargs->pci.addr.devid,
- devargs->pci.addr.function,
- devargs->args);
- else if (devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI)
- fprintf(f, " PCI blacklist " PCI_PRI_FMT " %s\n",
- devargs->pci.addr.domain,
- devargs->pci.addr.bus,
- devargs->pci.addr.devid,
- devargs->pci.addr.function,
- devargs->args);
- else if (devargs->type == RTE_DEVTYPE_VIRTUAL)
- fprintf(f, " VIRTUAL %s %s\n",
- devargs->virt.drv_name,
- devargs->args);
- else
- fprintf(f, " UNKNOWN %s\n", devargs->args);
+ fprintf(f, " [%s]: %s %s\n",
+ (devargs->bus ? devargs->bus->name : "??"),
+ devargs->name, devargs->args);
}
}
diff --git a/lib/librte_eal/common/eal_common_launch.c b/lib/librte_eal/common/eal_common_launch.c
index 229c3a03..137c191d 100644
--- a/lib/librte_eal/common/eal_common_launch.c
+++ b/lib/librte_eal/common/eal_common_launch.c
@@ -41,6 +41,7 @@
#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_atomic.h>
+#include <rte_pause.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
@@ -54,7 +55,8 @@ rte_eal_wait_lcore(unsigned slave_id)
return 0;
while (lcore_config[slave_id].state != WAIT &&
- lcore_config[slave_id].state != FINISHED);
+ lcore_config[slave_id].state != FINISHED)
+ rte_pause();
rte_rmb();
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 84fa0cb5..0db1555b 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -81,6 +81,7 @@ rte_eal_cpu_init(void)
/* By default, each detected core is enabled */
config->lcore_role[lcore_id] = ROLE_RTE;
+ lcore_config[lcore_id].core_role = ROLE_RTE;
lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id);
lcore_config[lcore_id].socket_id = eal_cpu_socket_id(lcore_id);
if (lcore_config[lcore_id].socket_id >= RTE_MAX_NUMA_NODES) {
diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c
index ddf65b7f..0e3b9320 100644
--- a/lib/librte_eal/common/eal_common_log.c
+++ b/lib/librte_eal/common/eal_common_log.c
@@ -112,6 +112,15 @@ rte_get_log_level(void)
return rte_log_get_global_level();
}
+int
+rte_log_get_level(uint32_t type)
+{
+ if (type >= rte_logs.dynamic_types_len)
+ return -1;
+
+ return rte_logs.dynamic_types[type].loglevel;
+}
+
/* Set global log type */
__rte_deprecated void
rte_set_log_type(uint32_t type, int enable)
@@ -173,13 +182,13 @@ rte_log_set_level_regexp(const char *pattern, uint32_t level)
return 0;
}
-/* get the current loglevel for the message beeing processed */
+/* get the current loglevel for the message being processed */
int rte_log_cur_msg_loglevel(void)
{
return RTE_PER_LCORE(log_cur_msg).loglevel;
}
-/* get the current logtype for the message beeing processed */
+/* get the current logtype for the message being processed */
int rte_log_cur_msg_logtype(void)
{
return RTE_PER_LCORE(log_cur_msg).logtype;
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 6155752e..996877ef 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -35,7 +35,9 @@
#include <stdint.h>
#include <stdlib.h>
#include <stdarg.h>
+#include <unistd.h>
#include <inttypes.h>
+#include <sys/mman.h>
#include <sys/queue.h>
#include <rte_memory.h>
@@ -135,6 +137,16 @@ rte_eal_memdevice_init(void)
return 0;
}
+/* Lock page in physical memory and prevent from swapping. */
+int
+rte_mem_lock_page(const void *virt)
+{
+ unsigned long virtual = (unsigned long)virt;
+ int page_size = getpagesize();
+ unsigned long aligned = (virtual & ~(page_size - 1));
+ return mlock((void *)aligned, page_size);
+}
+
/* init memory subsystem */
int
rte_eal_memory_init(void)
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index 64f4e0ad..3026e36b 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -189,7 +189,8 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
return NULL;
}
- if ((socket_id != SOCKET_ID_ANY) && (socket_id >= RTE_MAX_NUMA_NODES)) {
+ if ((socket_id != SOCKET_ID_ANY) &&
+ (socket_id >= RTE_MAX_NUMA_NODES || socket_id < 0)) {
rte_errno = EINVAL;
return NULL;
}
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index f470195f..1da185e5 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -47,6 +47,7 @@
#include <rte_eal.h>
#include <rte_log.h>
#include <rte_lcore.h>
+#include <rte_tailq.h>
#include <rte_version.h>
#include <rte_devargs.h>
#include <rte_memcpy.h>
@@ -61,9 +62,11 @@ const char
eal_short_options[] =
"b:" /* pci-blacklist */
"c:" /* coremask */
+ "s:" /* service coremask */
"d:" /* driver */
"h" /* help */
"l:" /* corelist */
+ "S:" /* service corelist */
"m:" /* memory size */
"n:" /* memory channels */
"r:" /* memory ranks */
@@ -123,11 +126,67 @@ static const char *default_solib_dir = RTE_EAL_PMD_PATH;
static const char dpdk_solib_path[] __attribute__((used)) =
"DPDK_PLUGIN_PATH=" RTE_EAL_PMD_PATH;
+TAILQ_HEAD(device_option_list, device_option);
+
+struct device_option {
+ TAILQ_ENTRY(device_option) next;
+
+ enum rte_devtype type;
+ char arg[];
+};
+
+static struct device_option_list devopt_list =
+TAILQ_HEAD_INITIALIZER(devopt_list);
static int master_lcore_parsed;
static int mem_parsed;
static int core_parsed;
+static int
+eal_option_device_add(enum rte_devtype type, const char *optarg)
+{
+ struct device_option *devopt;
+ size_t optlen;
+ int ret;
+
+ optlen = strlen(optarg) + 1;
+ devopt = calloc(1, sizeof(*devopt) + optlen);
+ if (devopt == NULL) {
+ RTE_LOG(ERR, EAL, "Unable to allocate device option\n");
+ return -ENOMEM;
+ }
+
+ devopt->type = type;
+ ret = snprintf(devopt->arg, optlen, "%s", optarg);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Unable to copy device option\n");
+ free(devopt);
+ return -EINVAL;
+ }
+ TAILQ_INSERT_TAIL(&devopt_list, devopt, next);
+ return 0;
+}
+
+int
+eal_option_device_parse(void)
+{
+ struct device_option *devopt;
+ void *tmp;
+ int ret = 0;
+
+ TAILQ_FOREACH_SAFE(devopt, &devopt_list, next, tmp) {
+ if (ret == 0) {
+ ret = rte_eal_devargs_add(devopt->type, devopt->arg);
+ if (ret)
+ RTE_LOG(ERR, EAL, "Unable to parse device '%s'\n",
+ devopt->arg);
+ }
+ TAILQ_REMOVE(&devopt_list, devopt, next);
+ free(devopt);
+ }
+ return ret;
+}
+
void
eal_reset_internal_config(struct internal_config *internal_cfg)
{
@@ -267,6 +326,77 @@ static int xdigit2val(unsigned char c)
}
static int
+eal_parse_service_coremask(const char *coremask)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ int i, j, idx = 0;
+ unsigned int count = 0;
+ char c;
+ int val;
+
+ if (coremask == NULL)
+ return -1;
+ /* Remove all blank characters ahead and after .
+ * Remove 0x/0X if exists.
+ */
+ while (isblank(*coremask))
+ coremask++;
+ if (coremask[0] == '0' && ((coremask[1] == 'x')
+ || (coremask[1] == 'X')))
+ coremask += 2;
+ i = strlen(coremask);
+ while ((i > 0) && isblank(coremask[i - 1]))
+ i--;
+
+ if (i == 0)
+ return -1;
+
+ for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
+ c = coremask[i];
+ if (isxdigit(c) == 0) {
+ /* invalid characters */
+ return -1;
+ }
+ val = xdigit2val(c);
+ for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE;
+ j++, idx++) {
+ if ((1 << j) & val) {
+ /* handle master lcore already parsed */
+ uint32_t lcore = idx;
+ if (master_lcore_parsed &&
+ cfg->master_lcore == lcore) {
+ RTE_LOG(ERR, EAL,
+ "Error: lcore %u is master lcore, cannot use as service core\n",
+ idx);
+ return -1;
+ }
+
+ if (!lcore_config[idx].detected) {
+ RTE_LOG(ERR, EAL,
+ "lcore %u unavailable\n", idx);
+ return -1;
+ }
+ lcore_config[idx].core_role = ROLE_SERVICE;
+ count++;
+ }
+ }
+ }
+
+ for (; i >= 0; i--)
+ if (coremask[i] != '0')
+ return -1;
+
+ for (; idx < RTE_MAX_LCORE; idx++)
+ lcore_config[idx].core_index = -1;
+
+ if (count == 0)
+ return -1;
+
+ cfg->service_lcore_count = count;
+ return 0;
+}
+
+static int
eal_parse_coremask(const char *coremask)
{
struct rte_config *cfg = rte_eal_get_configuration();
@@ -330,6 +460,72 @@ eal_parse_coremask(const char *coremask)
}
static int
+eal_parse_service_corelist(const char *corelist)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ int i, idx = 0;
+ unsigned count = 0;
+ char *end = NULL;
+ int min, max;
+
+ if (corelist == NULL)
+ return -1;
+
+ /* Remove all blank characters ahead and after */
+ while (isblank(*corelist))
+ corelist++;
+ i = strlen(corelist);
+ while ((i > 0) && isblank(corelist[i - 1]))
+ i--;
+
+ /* Get list of cores */
+ min = RTE_MAX_LCORE;
+ do {
+ while (isblank(*corelist))
+ corelist++;
+ if (*corelist == '\0')
+ return -1;
+ errno = 0;
+ idx = strtoul(corelist, &end, 10);
+ if (errno || end == NULL)
+ return -1;
+ while (isblank(*end))
+ end++;
+ if (*end == '-') {
+ min = idx;
+ } else if ((*end == ',') || (*end == '\0')) {
+ max = idx;
+ if (min == RTE_MAX_LCORE)
+ min = idx;
+ for (idx = min; idx <= max; idx++) {
+ if (cfg->lcore_role[idx] != ROLE_SERVICE) {
+ /* handle master lcore already parsed */
+ uint32_t lcore = idx;
+ if (cfg->master_lcore == lcore &&
+ master_lcore_parsed) {
+ RTE_LOG(ERR, EAL,
+ "Error: lcore %u is master lcore, cannot use as service core\n",
+ idx);
+ return -1;
+ }
+ lcore_config[idx].core_role =
+ ROLE_SERVICE;
+ count++;
+ }
+ }
+ min = RTE_MAX_LCORE;
+ } else
+ return -1;
+ corelist = end + 1;
+ } while (*end != '\0');
+
+ if (count == 0)
+ return -1;
+
+ return 0;
+}
+
+static int
eal_parse_corelist(const char *corelist)
{
struct rte_config *cfg = rte_eal_get_configuration();
@@ -409,6 +605,13 @@ eal_parse_master_lcore(const char *arg)
if (cfg->master_lcore >= RTE_MAX_LCORE)
return -1;
master_lcore_parsed = 1;
+
+ /* ensure master core is not used as service core */
+ if (lcore_config[cfg->master_lcore].core_role == ROLE_SERVICE) {
+ RTE_LOG(ERR, EAL, "Error: Master lcore is used as a service core.\n");
+ return -1;
+ }
+
return 0;
}
@@ -795,20 +998,29 @@ int
eal_parse_common_option(int opt, const char *optarg,
struct internal_config *conf)
{
+ static int b_used;
+ static int w_used;
+
switch (opt) {
/* blacklist */
case 'b':
- if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_PCI,
+ if (w_used)
+ goto bw_used;
+ if (eal_option_device_add(RTE_DEVTYPE_BLACKLISTED_PCI,
optarg) < 0) {
return -1;
}
+ b_used = 1;
break;
/* whitelist */
case 'w':
- if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_PCI,
+ if (b_used)
+ goto bw_used;
+ if (eal_option_device_add(RTE_DEVTYPE_WHITELISTED_PCI,
optarg) < 0) {
return -1;
}
+ w_used = 1;
break;
/* coremask */
case 'c':
@@ -826,6 +1038,20 @@ eal_parse_common_option(int opt, const char *optarg,
}
core_parsed = 1;
break;
+ /* service coremask */
+ case 's':
+ if (eal_parse_service_coremask(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid service coremask\n");
+ return -1;
+ }
+ break;
+ /* service corelist */
+ case 'S':
+ if (eal_parse_service_corelist(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid service core list\n");
+ return -1;
+ }
+ break;
/* size of memory */
case 'm':
conf->memory = atoi(optarg);
@@ -901,7 +1127,7 @@ eal_parse_common_option(int opt, const char *optarg,
break;
case OPT_VDEV_NUM:
- if (rte_eal_devargs_add(RTE_DEVTYPE_VIRTUAL,
+ if (eal_option_device_add(RTE_DEVTYPE_VIRTUAL,
optarg) < 0) {
return -1;
}
@@ -940,6 +1166,10 @@ eal_parse_common_option(int opt, const char *optarg,
}
return 0;
+bw_used:
+ RTE_LOG(ERR, EAL, "Options blacklist (-b) and whitelist (-w) "
+ "cannot be used at the same time\n");
+ return -1;
}
static void
@@ -978,8 +1208,10 @@ eal_adjust_config(struct internal_config *internal_cfg)
internal_config.process_type = eal_proc_type_detect();
/* default master lcore is the first one */
- if (!master_lcore_parsed)
+ if (!master_lcore_parsed) {
cfg->master_lcore = rte_get_next_lcore(-1, 0, 0);
+ lcore_config[cfg->master_lcore].core_role = ROLE_RTE;
+ }
/* if no memory amounts were requested, this will result in 0 and
* will be overridden later, right after eal_hugepage_info_init() */
@@ -1025,13 +1257,6 @@ eal_check_common_options(struct internal_config *internal_cfg)
return -1;
}
- if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) != 0 &&
- rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_PCI) != 0) {
- RTE_LOG(ERR, EAL, "Options blacklist (-b) and whitelist (-w) "
- "cannot be used at the same time\n");
- return -1;
- }
-
return 0;
}
@@ -1052,6 +1277,7 @@ eal_common_usage(void)
" ',' is used for single number separator.\n"
" '( )' can be omitted for single element group,\n"
" '@' can be omitted if cpus and lcores have the same value\n"
+ " -s SERVICE COREMASK Hexadecimal bitmask of cores to be used as service cores\n"
" --"OPT_MASTER_LCORE" ID Core ID that is used as master\n"
" -n CHANNELS Number of memory channels\n"
" -m MB Memory to allocate (see also --"OPT_SOCKET_MEM")\n"
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
index b7499913..52fd38cd 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -2,6 +2,7 @@
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright 2013-2014 6WIND S.A.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,36 +31,6 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* BSD LICENSE
- *
- * Copyright 2013-2014 6WIND S.A.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of 6WIND S.A. nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
#include <string.h>
#include <inttypes.h>
@@ -102,17 +73,43 @@ const char *pci_get_sysfs_path(void)
static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
{
struct rte_devargs *devargs;
+ struct rte_pci_addr addr;
+ struct rte_bus *pbus;
+ pbus = rte_bus_find_by_name("pci");
TAILQ_FOREACH(devargs, &devargs_list, next) {
- if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
- devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
+ if (devargs->bus != pbus)
continue;
- if (!rte_eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
+ devargs->bus->parse(devargs->name, &addr);
+ if (!rte_eal_compare_pci_addr(&dev->addr, &addr))
return devargs;
}
return NULL;
}
+void
+pci_name_set(struct rte_pci_device *dev)
+{
+ struct rte_devargs *devargs;
+
+ /* Each device has its internal, canonical name set. */
+ rte_pci_device_name(&dev->addr,
+ dev->name, sizeof(dev->name));
+ devargs = pci_devargs_lookup(dev);
+ dev->device.devargs = devargs;
+ /* In blacklist mode, if the device is not blacklisted, no
+ * rte_devargs exists for it.
+ */
+ if (devargs != NULL)
+ /* If an rte_devargs exists, the generic rte_device uses the
+ * given name as its namea
+ */
+ dev->device.name = dev->device.devargs->name;
+ else
+ /* Otherwise, it uses the internal, canonical form. */
+ dev->device.name = dev->name;
+}
+
/* map a particular resource from a file */
void *
pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
@@ -212,12 +209,9 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
loc = &dev->addr;
/* The device is not blacklisted; Check if driver supports it */
- if (!rte_pci_match(dr, dev)) {
+ if (!rte_pci_match(dr, dev))
/* Match of device and driver failed */
- RTE_LOG(DEBUG, EAL, "Driver (%s) doesn't match the device\n",
- dr->driver.name);
return 1;
- }
RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
loc->domain, loc->bus, loc->devid, loc->function,
@@ -225,13 +219,18 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
/* no initialization when blacklisted, return without error */
if (dev->device.devargs != NULL &&
- dev->device.devargs->type ==
- RTE_DEVTYPE_BLACKLISTED_PCI) {
+ dev->device.devargs->policy ==
+ RTE_DEV_BLACKLISTED) {
RTE_LOG(INFO, EAL, " Device is blacklisted, not"
" initializing\n");
return 1;
}
+ if (dev->device.numa_node < 0) {
+ RTE_LOG(WARNING, EAL, " Invalid NUMA socket, default to 0\n");
+ dev->device.numa_node = 0;
+ }
+
RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id,
dev->id.device_id, dr->driver.name);
@@ -250,7 +249,13 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
ret = dr->probe(dr, dev);
if (ret) {
dev->driver = NULL;
- if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)
+ dev->device.driver = NULL;
+ if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) &&
+ /* Don't unmap if device is unsupported and
+ * driver needs mapped resources.
+ */
+ !(ret > 0 &&
+ (dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES)))
rte_pci_unmap_device(dev);
}
@@ -326,7 +331,7 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
/*
* Find the pci device specified by pci address, then invoke probe function of
- * the driver of the devive.
+ * the driver of the device.
*/
int
rte_pci_probe_one(const struct rte_pci_addr *addr)
@@ -413,22 +418,18 @@ rte_pci_probe(void)
int probe_all = 0;
int ret = 0;
- if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) == 0)
+ if (rte_pci_bus.bus.conf.scan_mode != RTE_BUS_SCAN_WHITELIST)
probe_all = 1;
FOREACH_DEVICE_ON_PCIBUS(dev) {
probed++;
- /* set devargs in PCI structure */
- devargs = pci_devargs_lookup(dev);
- if (devargs != NULL)
- dev->device.devargs = devargs;
-
+ devargs = dev->device.devargs;
/* probe all or only whitelisted devices */
if (probe_all)
ret = pci_probe_all_drivers(dev);
else if (devargs != NULL &&
- devargs->type == RTE_DEVTYPE_WHITELISTED_PCI)
+ devargs->policy == RTE_DEV_WHITELISTED)
ret = pci_probe_all_drivers(dev);
if (ret < 0) {
RTE_LOG(ERR, EAL, "Requested device " PCI_PRI_FMT
@@ -474,6 +475,20 @@ rte_pci_dump(FILE *f)
}
}
+static int
+pci_parse(const char *name, void *addr)
+{
+ struct rte_pci_addr *out = addr;
+ struct rte_pci_addr pci_addr;
+ bool parse;
+
+ parse = (eal_parse_pci_BDF(name, &pci_addr) == 0 ||
+ eal_parse_pci_DomBDF(name, &pci_addr) == 0);
+ if (parse && addr != NULL)
+ *out = pci_addr;
+ return parse == false;
+}
+
/* register a driver */
void
rte_pci_register(struct rte_pci_driver *driver)
@@ -512,13 +527,54 @@ rte_pci_remove_device(struct rte_pci_device *pci_dev)
TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next);
}
+static struct rte_device *
+pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
+ const void *data)
+{
+ struct rte_pci_device *dev;
+
+ FOREACH_DEVICE_ON_PCIBUS(dev) {
+ if (start && &dev->device == start) {
+ start = NULL; /* starting point found */
+ continue;
+ }
+ if (cmp(&dev->device, data) == 0)
+ return &dev->device;
+ }
+
+ return NULL;
+}
+
+static int
+pci_plug(struct rte_device *dev)
+{
+ return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev));
+}
+
+static int
+pci_unplug(struct rte_device *dev)
+{
+ struct rte_pci_device *pdev;
+ int ret;
+
+ pdev = RTE_DEV_TO_PCI(dev);
+ ret = rte_pci_detach_dev(pdev);
+ rte_pci_remove_device(pdev);
+ free(pdev);
+ return ret;
+}
+
struct rte_pci_bus rte_pci_bus = {
.bus = {
.scan = rte_pci_scan,
.probe = rte_pci_probe,
+ .find_device = pci_find_device,
+ .plug = pci_plug,
+ .unplug = pci_unplug,
+ .parse = pci_parse,
},
.device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
.driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
};
-RTE_REGISTER_BUS(PCI_BUS_NAME, rte_pci_bus.bus);
+RTE_REGISTER_BUS(pci, rte_pci_bus.bus);
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 12e0fcac..60526cad 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -46,10 +46,10 @@ rte_eal_primary_proc_alive(const char *config_file_path)
if (config_file_path)
config_fd = open(config_file_path, O_RDONLY);
else {
- char default_path[PATH_MAX+1];
- snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT,
- default_config_dir, "rte");
- config_fd = open(default_path, O_RDONLY);
+ const char *path;
+
+ path = eal_runtime_config_path();
+ config_fd = open(path, O_RDONLY);
}
if (config_fd < 0)
return 0;
diff --git a/lib/librte_eal/common/eal_common_tailqs.c b/lib/librte_eal/common/eal_common_tailqs.c
index 4f698288..55955f9e 100644
--- a/lib/librte_eal/common/eal_common_tailqs.c
+++ b/lib/librte_eal/common/eal_common_tailqs.c
@@ -46,7 +46,6 @@
#include <rte_eal_memconfig.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
-#include <rte_memory.h>
#include <rte_atomic.h>
#include <rte_branch_prediction.h>
#include <rte_log.h>
diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c
index 72656176..ed0b16d0 100644
--- a/lib/librte_eal/common/eal_common_timer.c
+++ b/lib/librte_eal/common/eal_common_timer.c
@@ -41,6 +41,7 @@
#include <rte_common.h>
#include <rte_log.h>
#include <rte_cycles.h>
+#include <rte_pause.h>
#include "eal_private.h"
diff --git a/lib/librte_eal/common/eal_common_vdev.c b/lib/librte_eal/common/eal_common_vdev.c
index 0037a641..f7e547a6 100644
--- a/lib/librte_eal/common/eal_common_vdev.c
+++ b/lib/librte_eal/common/eal_common_vdev.c
@@ -35,14 +35,20 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
+#include <stdbool.h>
#include <sys/queue.h>
#include <rte_eal.h>
+#include <rte_dev.h>
#include <rte_bus.h>
#include <rte_vdev.h>
#include <rte_common.h>
#include <rte_devargs.h>
#include <rte_memory.h>
+#include <rte_errno.h>
+
+/* Forward declare to access virtual bus name */
+static struct rte_bus rte_vdev_bus;
/** Double linked list of virtual device drivers. */
TAILQ_HEAD(vdev_device_list, rte_vdev_device);
@@ -52,14 +58,10 @@ static struct vdev_device_list vdev_device_list =
struct vdev_driver_list vdev_driver_list =
TAILQ_HEAD_INITIALIZER(vdev_driver_list);
-static void rte_vdev_bus_register(void);
-
/* register a driver */
void
rte_vdev_register(struct rte_vdev_driver *driver)
{
- rte_vdev_bus_register();
-
TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next);
}
@@ -70,84 +72,45 @@ rte_vdev_unregister(struct rte_vdev_driver *driver)
TAILQ_REMOVE(&vdev_driver_list, driver, next);
}
-/*
- * Parse "driver" devargs without adding a dependency on rte_kvargs.h
- */
-static char *parse_driver_arg(const char *args)
+static int
+vdev_parse(const char *name, void *addr)
{
- const char *c;
- char *str;
-
- if (!args || args[0] == '\0')
- return NULL;
+ struct rte_vdev_driver **out = addr;
+ struct rte_vdev_driver *driver = NULL;
- c = args;
-
- do {
- if (strncmp(c, "driver=", 7) == 0) {
- c += 7;
+ TAILQ_FOREACH(driver, &vdev_driver_list, next) {
+ if (strncmp(driver->driver.name, name,
+ strlen(driver->driver.name)) == 0)
break;
- }
-
- c = strchr(c, ',');
- if (c)
- c++;
- } while (c);
-
- if (c)
- str = strdup(c);
- else
- str = NULL;
-
- return str;
+ if (driver->driver.alias &&
+ strncmp(driver->driver.alias, name,
+ strlen(driver->driver.alias)) == 0)
+ break;
+ }
+ if (driver != NULL &&
+ addr != NULL)
+ *out = driver;
+ return driver == NULL;
}
static int
vdev_probe_all_drivers(struct rte_vdev_device *dev)
{
const char *name;
- char *drv_name;
struct rte_vdev_driver *driver;
- int ret = 1;
+ int ret;
- drv_name = parse_driver_arg(rte_vdev_device_args(dev));
- name = drv_name ? drv_name : rte_vdev_device_name(dev);
+ name = rte_vdev_device_name(dev);
RTE_LOG(DEBUG, EAL, "Search driver %s to probe device %s\n", name,
rte_vdev_device_name(dev));
- TAILQ_FOREACH(driver, &vdev_driver_list, next) {
- /*
- * search a driver prefix in virtual device name.
- * For example, if the driver is pcap PMD, driver->name
- * will be "net_pcap", but "name" will be "net_pcapN".
- * So use strncmp to compare.
- */
- if (!strncmp(driver->driver.name, name,
- strlen(driver->driver.name))) {
- dev->device.driver = &driver->driver;
- ret = driver->probe(dev);
- if (ret)
- dev->device.driver = NULL;
- goto out;
- }
- }
-
- /* Give new names precedence over aliases. */
- TAILQ_FOREACH(driver, &vdev_driver_list, next) {
- if (driver->driver.alias &&
- !strncmp(driver->driver.alias, name,
- strlen(driver->driver.alias))) {
- dev->device.driver = &driver->driver;
- ret = driver->probe(dev);
- if (ret)
- dev->device.driver = NULL;
- break;
- }
- }
-
-out:
- free(drv_name);
+ if (vdev_parse(name, &driver))
+ return -1;
+ dev->device.driver = &driver->driver;
+ ret = driver->probe(dev);
+ if (ret)
+ dev->device.driver = NULL;
return ret;
}
@@ -178,13 +141,14 @@ alloc_devargs(const char *name, const char *args)
if (!devargs)
return NULL;
- devargs->type = RTE_DEVTYPE_VIRTUAL;
+ devargs->bus = &rte_vdev_bus;
if (args)
devargs->args = strdup(args);
+ else
+ devargs->args = strdup("");
- ret = snprintf(devargs->virt.drv_name,
- sizeof(devargs->virt.drv_name), "%s", name);
- if (ret < 0 || ret >= (int)sizeof(devargs->virt.drv_name)) {
+ ret = snprintf(devargs->name, sizeof(devargs->name), "%s", name);
+ if (ret < 0 || ret >= (int)sizeof(devargs->name)) {
free(devargs->args);
free(devargs);
return NULL;
@@ -219,7 +183,7 @@ rte_vdev_init(const char *name, const char *args)
dev->device.devargs = devargs;
dev->device.numa_node = SOCKET_ID_ANY;
- dev->device.name = devargs->virt.drv_name;
+ dev->device.name = devargs->name;
ret = vdev_probe_all_drivers(dev);
if (ret) {
@@ -293,13 +257,12 @@ vdev_scan(void)
struct rte_devargs *devargs;
/* for virtual devices we scan the devargs_list populated via cmdline */
-
TAILQ_FOREACH(devargs, &devargs_list, next) {
- if (devargs->type != RTE_DEVTYPE_VIRTUAL)
+ if (devargs->bus != &rte_vdev_bus)
continue;
- dev = find_vdev(devargs->virt.drv_name);
+ dev = find_vdev(devargs->name);
if (dev)
continue;
@@ -309,7 +272,7 @@ vdev_scan(void)
dev->device.devargs = devargs;
dev->device.numa_node = SOCKET_ID_ANY;
- dev->device.name = devargs->virt.drv_name;
+ dev->device.name = devargs->name;
TAILQ_INSERT_TAIL(&vdev_device_list, dev, next);
}
@@ -338,21 +301,42 @@ vdev_probe(void)
return 0;
}
-static struct rte_bus rte_vdev_bus = {
- .scan = vdev_scan,
- .probe = vdev_probe,
-};
+static struct rte_device *
+vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
+ const void *data)
+{
+ struct rte_vdev_device *dev;
-RTE_INIT(rte_vdev_bus_register);
+ TAILQ_FOREACH(dev, &vdev_device_list, next) {
+ if (start && &dev->device == start) {
+ start = NULL;
+ continue;
+ }
+ if (cmp(&dev->device, data) == 0)
+ return &dev->device;
+ }
+ return NULL;
+}
-static void rte_vdev_bus_register(void)
+static int
+vdev_plug(struct rte_device *dev)
{
- static int registered;
-
- if (registered)
- return;
+ return vdev_probe_all_drivers(RTE_DEV_TO_VDEV(dev));
+}
- registered = 1;
- rte_vdev_bus.name = RTE_STR(virtual);
- rte_bus_register(&rte_vdev_bus);
+static int
+vdev_unplug(struct rte_device *dev)
+{
+ return rte_vdev_uninit(dev->name);
}
+
+static struct rte_bus rte_vdev_bus = {
+ .scan = vdev_scan,
+ .probe = vdev_probe,
+ .find_device = vdev_find_device,
+ .plug = vdev_plug,
+ .unplug = vdev_unplug,
+ .parse = vdev_parse,
+};
+
+RTE_REGISTER_BUS(vdev, rte_vdev_bus);
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index a881c62e..439a2610 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -91,6 +91,7 @@ extern const struct option eal_long_options[];
int eal_parse_common_option(int opt, const char *argv,
struct internal_config *conf);
+int eal_option_device_parse(void);
int eal_adjust_config(struct internal_config *internal_cfg);
int eal_check_common_options(struct internal_config *internal_cfg);
void eal_common_usage(void);
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 6cacce07..597d82e4 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -113,6 +113,11 @@ struct rte_pci_driver;
struct rte_pci_device;
/**
+ * Find the name of a PCI device.
+ */
+void pci_name_set(struct rte_pci_device *dev);
+
+/**
* Add a PCI device to the PCI Bus (append to PCI Device list). This function
* also updates the bus references of the PCI Device (and the generic device
* object embedded within.
@@ -338,4 +343,16 @@ int rte_eal_hugepage_attach(void);
*/
bool rte_eal_using_phys_addrs(void);
+/**
+ * Find a bus capable of identifying a device.
+ *
+ * @param str
+ * A device identifier (PCI address, virtual PMD name, ...).
+ *
+ * @return
+ * A valid bus handle if found.
+ * NULL if no bus is able to parse this device.
+ */
+struct rte_bus *rte_bus_find_by_device_name(const char *str);
+
#endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
index dc3a0f3b..0b70d620 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
@@ -1,7 +1,7 @@
/*
* BSD LICENSE
*
- * Copyright (C) Cavium networks Ltd. 2015.
+ * Copyright (C) Cavium, Inc. 2015.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -13,7 +13,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
diff --git a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
index 1b312b30..0a29f4bb 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
@@ -52,7 +52,7 @@ static inline uint16_t rte_arch_bswap16(uint16_t _x)
{
register uint16_t x = _x;
- asm volatile ("rev16 %0,%1"
+ asm volatile ("rev16 %w0,%w1"
: "=r" (x)
: "r" (x)
);
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h
index 49aead92..5425f4c7 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h
@@ -1,7 +1,7 @@
/*
* BSD LICENSE
*
- * Copyright (C) Cavium networks Ltd. 2015.
+ * Copyright (C) Cavium, Inc. 2015.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -13,7 +13,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
index 867a9468..15457691 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
@@ -1,7 +1,7 @@
/*
* BSD LICENSE
*
- * Copyright (C) Cavium networks Ltd. 2015.
+ * Copyright (C) Cavium, Inc. 2015.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -13,7 +13,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
diff --git a/lib/librte_eal/common/include/arch/arm/rte_io.h b/lib/librte_eal/common/include/arch/arm/rte_io.h
index 9593b424..3b63ec85 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_io.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_io.h
@@ -1,7 +1,7 @@
/*
* BSD LICENSE
*
- * Copyright(c) 2016 Cavium networks. All rights reserved.
+ * Copyright(c) 2016 Cavium, Inc. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -14,7 +14,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
diff --git a/lib/librte_eal/common/include/arch/arm/rte_io_64.h b/lib/librte_eal/common/include/arch/arm/rte_io_64.h
index 0402125b..ee9b8d55 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_io_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_io_64.h
@@ -1,7 +1,7 @@
/*
* BSD LICENSE
*
- * Copyright (C) Cavium networks Ltd. 2016.
+ * Copyright (C) Cavium, Inc. 2016.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -13,7 +13,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
@@ -44,7 +44,7 @@ extern "C" {
#include "generic/rte_io.h"
#include "rte_atomic_64.h"
-static inline uint8_t __attribute__((always_inline))
+static __rte_always_inline uint8_t
rte_read8_relaxed(const volatile void *addr)
{
uint8_t val;
@@ -56,7 +56,7 @@ rte_read8_relaxed(const volatile void *addr)
return val;
}
-static inline uint16_t __attribute__((always_inline))
+static __rte_always_inline uint16_t
rte_read16_relaxed(const volatile void *addr)
{
uint16_t val;
@@ -68,7 +68,7 @@ rte_read16_relaxed(const volatile void *addr)
return val;
}
-static inline uint32_t __attribute__((always_inline))
+static __rte_always_inline uint32_t
rte_read32_relaxed(const volatile void *addr)
{
uint32_t val;
@@ -80,7 +80,7 @@ rte_read32_relaxed(const volatile void *addr)
return val;
}
-static inline uint64_t __attribute__((always_inline))
+static __rte_always_inline uint64_t
rte_read64_relaxed(const volatile void *addr)
{
uint64_t val;
@@ -92,7 +92,7 @@ rte_read64_relaxed(const volatile void *addr)
return val;
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_write8_relaxed(uint8_t val, volatile void *addr)
{
asm volatile(
@@ -101,7 +101,7 @@ rte_write8_relaxed(uint8_t val, volatile void *addr)
: [val] "r" (val), [addr] "r" (addr));
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_write16_relaxed(uint16_t val, volatile void *addr)
{
asm volatile(
@@ -110,7 +110,7 @@ rte_write16_relaxed(uint16_t val, volatile void *addr)
: [val] "r" (val), [addr] "r" (addr));
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_write32_relaxed(uint32_t val, volatile void *addr)
{
asm volatile(
@@ -119,7 +119,7 @@ rte_write32_relaxed(uint32_t val, volatile void *addr)
: [val] "r" (val), [addr] "r" (addr));
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_write64_relaxed(uint64_t val, volatile void *addr)
{
asm volatile(
@@ -128,7 +128,7 @@ rte_write64_relaxed(uint64_t val, volatile void *addr)
: [val] "r" (val), [addr] "r" (addr));
}
-static inline uint8_t __attribute__((always_inline))
+static __rte_always_inline uint8_t
rte_read8(const volatile void *addr)
{
uint8_t val;
@@ -137,7 +137,7 @@ rte_read8(const volatile void *addr)
return val;
}
-static inline uint16_t __attribute__((always_inline))
+static __rte_always_inline uint16_t
rte_read16(const volatile void *addr)
{
uint16_t val;
@@ -146,7 +146,7 @@ rte_read16(const volatile void *addr)
return val;
}
-static inline uint32_t __attribute__((always_inline))
+static __rte_always_inline uint32_t
rte_read32(const volatile void *addr)
{
uint32_t val;
@@ -155,7 +155,7 @@ rte_read32(const volatile void *addr)
return val;
}
-static inline uint64_t __attribute__((always_inline))
+static __rte_always_inline uint64_t
rte_read64(const volatile void *addr)
{
uint64_t val;
@@ -164,28 +164,28 @@ rte_read64(const volatile void *addr)
return val;
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_write8(uint8_t value, volatile void *addr)
{
rte_io_wmb();
rte_write8_relaxed(value, addr);
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_write16(uint16_t value, volatile void *addr)
{
rte_io_wmb();
rte_write16_relaxed(value, addr);
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_write32(uint32_t value, volatile void *addr)
{
rte_io_wmb();
rte_write32_relaxed(value, addr);
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_write64(uint64_t value, volatile void *addr)
{
rte_io_wmb();
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h
index 5db66b63..b80d8ba4 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h
@@ -1,7 +1,7 @@
/*
* BSD LICENSE
*
- * Copyright (C) Cavium networks Ltd. 2015.
+ * Copyright (C) Cavium, Inc. 2015.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -13,7 +13,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
diff --git a/lib/librte_eal/common/include/arch/arm/rte_pause.h b/lib/librte_eal/common/include/arch/arm/rte_pause.h
new file mode 100644
index 00000000..b772ca07
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_pause.h
@@ -0,0 +1,50 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Cavium, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium, Inc nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PAUSE_ARM_H_
+#define _RTE_PAUSE_ARM_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef RTE_ARCH_64
+#include <rte_pause_64.h>
+#else
+#include <rte_pause_32.h>
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PAUSE_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_pause_32.h b/lib/librte_eal/common/include/arch/arm/rte_pause_32.h
new file mode 100644
index 00000000..ec680b5c
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_pause_32.h
@@ -0,0 +1,51 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Cavium, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium, Inc nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PAUSE_ARM32_H_
+#define _RTE_PAUSE_ARM32_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_pause.h"
+
+static inline void rte_pause(void)
+{
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PAUSE_ARM32_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_pause_64.h b/lib/librte_eal/common/include/arch/arm/rte_pause_64.h
new file mode 100644
index 00000000..2da46326
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_pause_64.h
@@ -0,0 +1,52 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Cavium, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium, Inc nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PAUSE_ARM64_H_
+#define _RTE_PAUSE_ARM64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_pause.h"
+
+static inline void rte_pause(void)
+{
+ asm volatile("yield" ::: "memory");
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PAUSE_ARM64_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
index 0d077ea6..ff59509f 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
@@ -1,7 +1,7 @@
/*
* BSD LICENSE
*
- * Copyright (C) Cavium networks Ltd. 2015.
+ * Copyright (C) Cavium, Inc. 2015.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -13,7 +13,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
index 4107c998..782350d1 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2015 Cavium Networks. All rights reserved.
+ * Copyright(c) 2015 Cavium, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -13,7 +13,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium Networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
@@ -35,6 +35,7 @@
#include <stdint.h>
#include "generic/rte_vect.h"
+#include "rte_debug.h"
#include "arm_neon.h"
#ifdef __cplusplus
@@ -76,8 +77,122 @@ vqtbl1q_u8(uint8x16_t a, uint8x16_t b)
return vld1q_u8(rte_ret.u8);
}
+
+static inline uint16_t
+vaddvq_u16(uint16x8_t a)
+{
+ uint32x4_t m = vpaddlq_u16(a);
+ uint64x2_t n = vpaddlq_u32(m);
+ uint64x1_t o = vget_low_u64(n) + vget_high_u64(n);
+
+ return vget_lane_u32((uint32x2_t)o, 0);
+}
+
#endif
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70000)
+static inline uint32x4_t
+vcopyq_laneq_u32(uint32x4_t a, const int lane_a,
+ uint32x4_t b, const int lane_b)
+{
+ return vsetq_lane_u32(vgetq_lane_u32(b, lane_b), a, lane_a);
+}
+#endif
+
+#if defined(RTE_ARCH_ARM64)
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70000)
+
+#if (GCC_VERSION < 40900)
+typedef uint64_t poly64_t;
+typedef uint64x2_t poly64x2_t;
+typedef uint8_t poly128_t __attribute__((vector_size(16), aligned(16)));
+#endif
+
+/* NEON intrinsic vreinterpretq_u64_p128() is supported since GCC version 7 */
+static inline uint64x2_t
+vreinterpretq_u64_p128(poly128_t x)
+{
+ return (uint64x2_t)x;
+}
+
+/* NEON intrinsic vreinterpretq_p64_u64() is supported since GCC version 7 */
+static inline poly64x2_t
+vreinterpretq_p64_u64(uint64x2_t x)
+{
+ return (poly64x2_t)x;
+}
+
+/* NEON intrinsic vgetq_lane_p64() is supported since GCC version 7 */
+static inline poly64_t
+vgetq_lane_p64(poly64x2_t x, const int lane)
+{
+ RTE_ASSERT(lane >= 0 && lane <= 1);
+
+ poly64_t *p = (poly64_t *)&x;
+
+ return p[lane];
+}
+#endif
+#endif
+
+/*
+ * If (0 <= index <= 15), then call the ASIMD ext intruction on the
+ * 128 bit regs v0 and v1 with the appropriate index.
+ *
+ * Else returns a zero vector.
+ */
+static inline uint8x16_t
+vextract(uint8x16_t v0, uint8x16_t v1, const int index)
+{
+ switch (index) {
+ case 0: return vextq_u8(v0, v1, 0);
+ case 1: return vextq_u8(v0, v1, 1);
+ case 2: return vextq_u8(v0, v1, 2);
+ case 3: return vextq_u8(v0, v1, 3);
+ case 4: return vextq_u8(v0, v1, 4);
+ case 5: return vextq_u8(v0, v1, 5);
+ case 6: return vextq_u8(v0, v1, 6);
+ case 7: return vextq_u8(v0, v1, 7);
+ case 8: return vextq_u8(v0, v1, 8);
+ case 9: return vextq_u8(v0, v1, 9);
+ case 10: return vextq_u8(v0, v1, 10);
+ case 11: return vextq_u8(v0, v1, 11);
+ case 12: return vextq_u8(v0, v1, 12);
+ case 13: return vextq_u8(v0, v1, 13);
+ case 14: return vextq_u8(v0, v1, 14);
+ case 15: return vextq_u8(v0, v1, 15);
+ }
+ return vdupq_n_u8(0);
+}
+
+/**
+ * Shifts right 128 bit register by specified number of bytes
+ *
+ * Value of shift parameter must be in range 0 - 16
+ */
+static inline uint64x2_t
+vshift_bytes_right(uint64x2_t reg, const unsigned int shift)
+{
+ return vreinterpretq_u64_u8(vextract(
+ vreinterpretq_u8_u64(reg),
+ vdupq_n_u8(0),
+ shift));
+}
+
+/**
+ * Shifts left 128 bit register by specified number of bytes
+ *
+ * Value of shift parameter must be in range 0 - 16
+ */
+static inline uint64x2_t
+vshift_bytes_left(uint64x2_t reg, const unsigned int shift)
+{
+ return vreinterpretq_u64_u8(vextract(
+ vdupq_n_u8(0),
+ vreinterpretq_u8_u64(reg),
+ 16 - shift));
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_io.h b/lib/librte_eal/common/include/arch/ppc_64/rte_io.h
index be192da7..1f42ced5 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_io.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_io.h
@@ -1,7 +1,7 @@
/*
* BSD LICENSE
*
- * Copyright(c) 2016 Cavium networks. All rights reserved.
+ * Copyright(c) 2016 Cavium, Inc. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -14,7 +14,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_pause.h b/lib/librte_eal/common/include/arch/ppc_64/rte_pause.h
new file mode 100644
index 00000000..389682ca
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_pause.h
@@ -0,0 +1,51 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) Cavium, Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium, Inc nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PAUSE_PPC64_H_
+#define _RTE_PAUSE_PPC64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_pause.h"
+
+static inline void rte_pause(void)
+{
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PAUSE_PPC64_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
index af139c9d..39815d9e 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h
@@ -38,6 +38,7 @@ extern "C" {
#endif
#include <rte_common.h>
+#include <rte_pause.h>
#include "generic/rte_spinlock.h"
/* Fixme: Use intrinsics to implement the spinlock on Power architecture */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_cycles.h b/lib/librte_eal/common/include/arch/x86/rte_cycles.h
index 5eb6ce96..1bb3e1db 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_cycles.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_cycles.h
@@ -2,6 +2,7 @@
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 6WIND.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,36 +31,6 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* BSD LICENSE
- *
- * Copyright(c) 2013 6WIND.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of 6WIND S.A. nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
#ifndef _RTE_CYCLES_X86_64_H_
#define _RTE_CYCLES_X86_64_H_
diff --git a/lib/librte_eal/common/include/arch/x86/rte_io.h b/lib/librte_eal/common/include/arch/x86/rte_io.h
index c8d14043..130022d0 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_io.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_io.h
@@ -1,7 +1,7 @@
/*
* BSD LICENSE
*
- * Copyright(c) 2016 Cavium networks. All rights reserved.
+ * Copyright(c) 2016 Cavium, Inc. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -14,7 +14,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
index b9785e85..74c280c2 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
@@ -44,6 +44,7 @@
#include <stdint.h>
#include <string.h>
#include <rte_vect.h>
+#include <rte_common.h>
#ifdef __cplusplus
extern "C" {
@@ -64,8 +65,8 @@ extern "C" {
* @return
* Pointer to the destination data.
*/
-static inline void *
-rte_memcpy(void *dst, const void *src, size_t n) __attribute__((always_inline));
+static __rte_always_inline void *
+rte_memcpy(void *dst, const void *src, size_t n);
#ifdef RTE_MACHINE_CPUFLAG_AVX512F
diff --git a/lib/librte_eal/common/include/arch/x86/rte_pause.h b/lib/librte_eal/common/include/arch/x86/rte_pause.h
new file mode 100644
index 00000000..29130c4b
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_pause.h
@@ -0,0 +1,53 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) Cavium, Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium, Inc nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PAUSE_X86_H_
+#define _RTE_PAUSE_X86_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_pause.h"
+
+#include <emmintrin.h>
+static inline void rte_pause(void)
+{
+ _mm_pause();
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PAUSE_X86_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
index 8e630c21..5675c2b4 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
@@ -43,6 +43,7 @@ extern "C" {
#include "rte_cpuflags.h"
#include "rte_branch_prediction.h"
#include "rte_common.h"
+#include "rte_pause.h"
#define RTE_RTM_MAX_RETRIES (10)
#define RTE_XABORT_LOCK_BUSY (0xff)
diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h
index 1b4b85dd..03fc991e 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h
@@ -45,21 +45,7 @@
#if (defined(__ICC) || (__GNUC__ == 4 && __GNUC_MINOR__ < 4))
-#ifdef __SSE__
-#include <xmmintrin.h>
-#endif
-
-#ifdef __SSE2__
-#include <emmintrin.h>
-#endif
-
-#ifdef __SSE3__
-#include <tmmintrin.h>
-#endif
-
-#if defined(__SSE4_2__) || defined(__SSE4_1__)
-#include <smmintrin.h>
-#endif
+#include <smmintrin.h> /* SSE4 */
#if defined(__AVX__)
#include <immintrin.h>
diff --git a/lib/librte_eal/common/include/generic/rte_byteorder.h b/lib/librte_eal/common/include/generic/rte_byteorder.h
index e00bccbc..e5e820d3 100644
--- a/lib/librte_eal/common/include/generic/rte_byteorder.h
+++ b/lib/librte_eal/common/include/generic/rte_byteorder.h
@@ -74,6 +74,73 @@
#elif defined __LITTLE_ENDIAN__
#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
#endif
+#if !defined(RTE_BYTE_ORDER)
+#error Unknown endianness.
+#endif
+
+#define RTE_STATIC_BSWAP16(v) \
+ ((((uint16_t)(v) & UINT16_C(0x00ff)) << 8) | \
+ (((uint16_t)(v) & UINT16_C(0xff00)) >> 8))
+
+#define RTE_STATIC_BSWAP32(v) \
+ ((((uint32_t)(v) & UINT32_C(0x000000ff)) << 24) | \
+ (((uint32_t)(v) & UINT32_C(0x0000ff00)) << 8) | \
+ (((uint32_t)(v) & UINT32_C(0x00ff0000)) >> 8) | \
+ (((uint32_t)(v) & UINT32_C(0xff000000)) >> 24))
+
+#define RTE_STATIC_BSWAP64(v) \
+ ((((uint64_t)(v) & UINT64_C(0x00000000000000ff)) << 56) | \
+ (((uint64_t)(v) & UINT64_C(0x000000000000ff00)) << 40) | \
+ (((uint64_t)(v) & UINT64_C(0x0000000000ff0000)) << 24) | \
+ (((uint64_t)(v) & UINT64_C(0x00000000ff000000)) << 8) | \
+ (((uint64_t)(v) & UINT64_C(0x000000ff00000000)) >> 8) | \
+ (((uint64_t)(v) & UINT64_C(0x0000ff0000000000)) >> 24) | \
+ (((uint64_t)(v) & UINT64_C(0x00ff000000000000)) >> 40) | \
+ (((uint64_t)(v) & UINT64_C(0xff00000000000000)) >> 56))
+
+/*
+ * These macros are functionally similar to rte_cpu_to_(be|le)(16|32|64)(),
+ * they take values in host CPU order and return them converted to the
+ * intended endianness.
+ *
+ * They resolve at compilation time to integer constants which can safely be
+ * used with static initializers, since those cannot involve function calls.
+ *
+ * On the other hand, they are not as optimized as their rte_cpu_to_*()
+ * counterparts, therefore applications should refrain from using them on
+ * variable values, particularly inside performance-sensitive code.
+ */
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+#define RTE_BE16(v) (rte_be16_t)(v)
+#define RTE_BE32(v) (rte_be32_t)(v)
+#define RTE_BE64(v) (rte_be64_t)(v)
+#define RTE_LE16(v) (rte_le16_t)(RTE_STATIC_BSWAP16(v))
+#define RTE_LE32(v) (rte_le32_t)(RTE_STATIC_BSWAP32(v))
+#define RTE_LE64(v) (rte_le64_t)(RTE_STATIC_BSWAP64(v))
+#elif RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+#define RTE_BE16(v) (rte_be16_t)(RTE_STATIC_BSWAP16(v))
+#define RTE_BE32(v) (rte_be32_t)(RTE_STATIC_BSWAP32(v))
+#define RTE_BE64(v) (rte_be64_t)(RTE_STATIC_BSWAP64(v))
+#define RTE_LE16(v) (rte_be16_t)(v)
+#define RTE_LE32(v) (rte_be32_t)(v)
+#define RTE_LE64(v) (rte_be64_t)(v)
+#else
+#error Unsupported endianness.
+#endif
+
+/*
+ * The following types should be used when handling values according to a
+ * specific byte ordering, which may differ from that of the host CPU.
+ *
+ * Libraries, public APIs and applications are encouraged to use them for
+ * documentation purposes.
+ */
+typedef uint16_t rte_be16_t; /**< 16-bit big-endian value. */
+typedef uint32_t rte_be32_t; /**< 32-bit big-endian value. */
+typedef uint64_t rte_be64_t; /**< 64-bit big-endian value. */
+typedef uint16_t rte_le16_t; /**< 16-bit little-endian value. */
+typedef uint32_t rte_le32_t; /**< 32-bit little-endian value. */
+typedef uint64_t rte_le64_t; /**< 64-bit little-endian value. */
/*
* An internal function to swap bytes in a 16-bit value.
@@ -84,8 +151,7 @@
static inline uint16_t
rte_constant_bswap16(uint16_t x)
{
- return (uint16_t)(((x & 0x00ffU) << 8) |
- ((x & 0xff00U) >> 8));
+ return RTE_STATIC_BSWAP16(x);
}
/*
@@ -97,10 +163,7 @@ rte_constant_bswap16(uint16_t x)
static inline uint32_t
rte_constant_bswap32(uint32_t x)
{
- return ((x & 0x000000ffUL) << 24) |
- ((x & 0x0000ff00UL) << 8) |
- ((x & 0x00ff0000UL) >> 8) |
- ((x & 0xff000000UL) >> 24);
+ return RTE_STATIC_BSWAP32(x);
}
/*
@@ -112,14 +175,7 @@ rte_constant_bswap32(uint32_t x)
static inline uint64_t
rte_constant_bswap64(uint64_t x)
{
- return ((x & 0x00000000000000ffULL) << 56) |
- ((x & 0x000000000000ff00ULL) << 40) |
- ((x & 0x0000000000ff0000ULL) << 24) |
- ((x & 0x00000000ff000000ULL) << 8) |
- ((x & 0x000000ff00000000ULL) >> 8) |
- ((x & 0x0000ff0000000000ULL) >> 24) |
- ((x & 0x00ff000000000000ULL) >> 40) |
- ((x & 0xff00000000000000ULL) >> 56);
+ return RTE_STATIC_BSWAP64(x);
}
@@ -143,65 +199,65 @@ static uint64_t rte_bswap64(uint64_t x);
/**
* Convert a 16-bit value from CPU order to little endian.
*/
-static uint16_t rte_cpu_to_le_16(uint16_t x);
+static rte_le16_t rte_cpu_to_le_16(uint16_t x);
/**
* Convert a 32-bit value from CPU order to little endian.
*/
-static uint32_t rte_cpu_to_le_32(uint32_t x);
+static rte_le32_t rte_cpu_to_le_32(uint32_t x);
/**
* Convert a 64-bit value from CPU order to little endian.
*/
-static uint64_t rte_cpu_to_le_64(uint64_t x);
+static rte_le64_t rte_cpu_to_le_64(uint64_t x);
/**
* Convert a 16-bit value from CPU order to big endian.
*/
-static uint16_t rte_cpu_to_be_16(uint16_t x);
+static rte_be16_t rte_cpu_to_be_16(uint16_t x);
/**
* Convert a 32-bit value from CPU order to big endian.
*/
-static uint32_t rte_cpu_to_be_32(uint32_t x);
+static rte_be32_t rte_cpu_to_be_32(uint32_t x);
/**
* Convert a 64-bit value from CPU order to big endian.
*/
-static uint64_t rte_cpu_to_be_64(uint64_t x);
+static rte_be64_t rte_cpu_to_be_64(uint64_t x);
/**
* Convert a 16-bit value from little endian to CPU order.
*/
-static uint16_t rte_le_to_cpu_16(uint16_t x);
+static uint16_t rte_le_to_cpu_16(rte_le16_t x);
/**
* Convert a 32-bit value from little endian to CPU order.
*/
-static uint32_t rte_le_to_cpu_32(uint32_t x);
+static uint32_t rte_le_to_cpu_32(rte_le32_t x);
/**
* Convert a 64-bit value from little endian to CPU order.
*/
-static uint64_t rte_le_to_cpu_64(uint64_t x);
+static uint64_t rte_le_to_cpu_64(rte_le64_t x);
/**
* Convert a 16-bit value from big endian to CPU order.
*/
-static uint16_t rte_be_to_cpu_16(uint16_t x);
+static uint16_t rte_be_to_cpu_16(rte_be16_t x);
/**
* Convert a 32-bit value from big endian to CPU order.
*/
-static uint32_t rte_be_to_cpu_32(uint32_t x);
+static uint32_t rte_be_to_cpu_32(rte_be32_t x);
/**
* Convert a 64-bit value from big endian to CPU order.
*/
-static uint64_t rte_be_to_cpu_64(uint64_t x);
+static uint64_t rte_be_to_cpu_64(rte_be64_t x);
#endif /* __DOXYGEN__ */
diff --git a/lib/librte_eal/common/include/generic/rte_cycles.h b/lib/librte_eal/common/include/generic/rte_cycles.h
index 0e645c2c..0df90474 100644
--- a/lib/librte_eal/common/include/generic/rte_cycles.h
+++ b/lib/librte_eal/common/include/generic/rte_cycles.h
@@ -2,6 +2,7 @@
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 6WIND.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,36 +31,6 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* BSD LICENSE
- *
- * Copyright(c) 2013 6WIND.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of 6WIND S.A. nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
#ifndef _RTE_CYCLES_H_
#define _RTE_CYCLES_H_
diff --git a/lib/librte_eal/common/include/generic/rte_io.h b/lib/librte_eal/common/include/generic/rte_io.h
index d82ee695..0b88c341 100644
--- a/lib/librte_eal/common/include/generic/rte_io.h
+++ b/lib/librte_eal/common/include/generic/rte_io.h
@@ -1,7 +1,7 @@
/*
* BSD LICENSE
*
- * Copyright(c) 2016 Cavium networks. All rights reserved.
+ * Copyright(c) 2016 Cavium, Inc. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -14,7 +14,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
@@ -34,8 +34,6 @@
#ifndef _RTE_IO_H_
#define _RTE_IO_H_
-#include <rte_atomic.h>
-
/**
* @file
* I/O device memory operations
@@ -264,55 +262,55 @@ rte_write64(uint64_t value, volatile void *addr);
#ifndef RTE_OVERRIDE_IO_H
-static inline uint8_t __attribute__((always_inline))
+static __rte_always_inline uint8_t
rte_read8_relaxed(const volatile void *addr)
{
return *(const volatile uint8_t *)addr;
}
-static inline uint16_t __attribute__((always_inline))
+static __rte_always_inline uint16_t
rte_read16_relaxed(const volatile void *addr)
{
return *(const volatile uint16_t *)addr;
}
-static inline uint32_t __attribute__((always_inline))
+static __rte_always_inline uint32_t
rte_read32_relaxed(const volatile void *addr)
{
return *(const volatile uint32_t *)addr;
}
-static inline uint64_t __attribute__((always_inline))
+static __rte_always_inline uint64_t
rte_read64_relaxed(const volatile void *addr)
{
return *(const volatile uint64_t *)addr;
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_write8_relaxed(uint8_t value, volatile void *addr)
{
*(volatile uint8_t *)addr = value;
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_write16_relaxed(uint16_t value, volatile void *addr)
{
*(volatile uint16_t *)addr = value;
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_write32_relaxed(uint32_t value, volatile void *addr)
{
*(volatile uint32_t *)addr = value;
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_write64_relaxed(uint64_t value, volatile void *addr)
{
*(volatile uint64_t *)addr = value;
}
-static inline uint8_t __attribute__((always_inline))
+static __rte_always_inline uint8_t
rte_read8(const volatile void *addr)
{
uint8_t val;
@@ -321,7 +319,7 @@ rte_read8(const volatile void *addr)
return val;
}
-static inline uint16_t __attribute__((always_inline))
+static __rte_always_inline uint16_t
rte_read16(const volatile void *addr)
{
uint16_t val;
@@ -330,7 +328,7 @@ rte_read16(const volatile void *addr)
return val;
}
-static inline uint32_t __attribute__((always_inline))
+static __rte_always_inline uint32_t
rte_read32(const volatile void *addr)
{
uint32_t val;
@@ -339,7 +337,7 @@ rte_read32(const volatile void *addr)
return val;
}
-static inline uint64_t __attribute__((always_inline))
+static __rte_always_inline uint64_t
rte_read64(const volatile void *addr)
{
uint64_t val;
@@ -348,28 +346,28 @@ rte_read64(const volatile void *addr)
return val;
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_write8(uint8_t value, volatile void *addr)
{
rte_io_wmb();
rte_write8_relaxed(value, addr);
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_write16(uint16_t value, volatile void *addr)
{
rte_io_wmb();
rte_write16_relaxed(value, addr);
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_write32(uint32_t value, volatile void *addr)
{
rte_io_wmb();
rte_write32_relaxed(value, addr);
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_write64(uint64_t value, volatile void *addr)
{
rte_io_wmb();
diff --git a/lib/librte_eal/common/include/generic/rte_pause.h b/lib/librte_eal/common/include/generic/rte_pause.h
new file mode 100644
index 00000000..a8374321
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_pause.h
@@ -0,0 +1,52 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Cavium, Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium, Inc nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PAUSE_H_
+#define _RTE_PAUSE_H_
+
+/**
+ * @file
+ *
+ * CPU pause operation.
+ *
+ */
+
+/**
+ * Pause CPU execution for a short while
+ *
+ * This call is intended for tight loops which poll a shared resource or wait
+ * for an event. A short pause within the loop may reduce the power consumption.
+ */
+static inline void rte_pause(void);
+
+#endif /* _RTE_PAUSE_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_rwlock.h b/lib/librte_eal/common/include/generic/rte_rwlock.h
index 7a0fdc55..fdb3113d 100644
--- a/lib/librte_eal/common/include/generic/rte_rwlock.h
+++ b/lib/librte_eal/common/include/generic/rte_rwlock.h
@@ -52,6 +52,7 @@ extern "C" {
#include <rte_common.h>
#include <rte_atomic.h>
+#include <rte_pause.h>
/**
* The rte_rwlock_t type.
diff --git a/lib/librte_eal/common/include/generic/rte_spinlock.h b/lib/librte_eal/common/include/generic/rte_spinlock.h
index e51fc56b..54f83a4c 100644
--- a/lib/librte_eal/common/include/generic/rte_spinlock.h
+++ b/lib/librte_eal/common/include/generic/rte_spinlock.h
@@ -51,6 +51,7 @@
#ifdef RTE_FORCE_INTRINSICS
#include <rte_common.h>
#endif
+#include <rte_pause.h>
/**
* The rte_spinlock_t type.
diff --git a/lib/librte_eal/common/include/rte_alarm.h b/lib/librte_eal/common/include/rte_alarm.h
index 4012cd67..c275be18 100644
--- a/lib/librte_eal/common/include/rte_alarm.h
+++ b/lib/librte_eal/common/include/rte_alarm.h
@@ -91,7 +91,7 @@ int rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb, void *cb_arg);
* the number of canceled alarm callback functions
* - value greater or equal 0 and rte_errno set to EINPROGRESS, at least one
* alarm could not be canceled because cancellation was requested from alarm
- * callback context. Returned value is the number of succesfuly canceled
+ * callback context. Returned value is the number of successfully canceled
* alarm callbacks
* - 0 and rte_errno set to ENOENT - no alarm found
* - -1 and rte_errno set to EINVAL - invalid parameter (NULL callback)
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index 7c369692..c79368d3 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -1,8 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2016 NXP
- * All rights reserved.
+ * Copyright 2016 NXP
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -58,7 +57,7 @@ TAILQ_HEAD(rte_bus_list, rte_bus);
/**
* Bus specific scan for devices attached on the bus.
- * For each bus object, the scan would be reponsible for finding devices and
+ * For each bus object, the scan would be responsible for finding devices and
* adding them to its private device list.
*
* A bus should mandatorily implement this method.
@@ -82,6 +81,94 @@ typedef int (*rte_bus_scan_t)(void);
typedef int (*rte_bus_probe_t)(void);
/**
+ * Device iterator to find a device on a bus.
+ *
+ * This function returns an rte_device if one of those held by the bus
+ * matches the data passed as parameter.
+ *
+ * If the comparison function returns zero this function should stop iterating
+ * over any more devices. To continue a search the device of a previous search
+ * can be passed via the start parameter.
+ *
+ * @param cmp
+ * Comparison function.
+ *
+ * @param data
+ * Data to compare each device against.
+ *
+ * @param start
+ * starting point for the iteration
+ *
+ * @return
+ * The first device matching the data, NULL if none exists.
+ */
+typedef struct rte_device *
+(*rte_bus_find_device_t)(const struct rte_device *start, rte_dev_cmp_t cmp,
+ const void *data);
+
+/**
+ * Implementation specific probe function which is responsible for linking
+ * devices on that bus with applicable drivers.
+ *
+ * @param dev
+ * Device pointer that was returned by a previous call to find_device.
+ *
+ * @return
+ * 0 on success.
+ * !0 on error.
+ */
+typedef int (*rte_bus_plug_t)(struct rte_device *dev);
+
+/**
+ * Implementation specific remove function which is responsible for unlinking
+ * devices on that bus from assigned driver.
+ *
+ * @param dev
+ * Device pointer that was returned by a previous call to find_device.
+ *
+ * @return
+ * 0 on success.
+ * !0 on error.
+ */
+typedef int (*rte_bus_unplug_t)(struct rte_device *dev);
+
+/**
+ * Bus specific parsing function.
+ * Validates the syntax used in the textual representation of a device,
+ * If the syntax is valid and ``addr`` is not NULL, writes the bus-specific
+ * device representation to ``addr``.
+ *
+ * @param[in] name
+ * device textual description
+ *
+ * @param[out] addr
+ * device information location address, into which parsed info
+ * should be written. If NULL, nothing should be written, which
+ * is not an error.
+ *
+ * @return
+ * 0 if parsing was successful.
+ * !0 for any error.
+ */
+typedef int (*rte_bus_parse_t)(const char *name, void *addr);
+
+/**
+ * Bus scan policies
+ */
+enum rte_bus_scan_mode {
+ RTE_BUS_SCAN_UNDEFINED,
+ RTE_BUS_SCAN_WHITELIST,
+ RTE_BUS_SCAN_BLACKLIST,
+};
+
+/**
+ * A structure used to configure bus operations.
+ */
+struct rte_bus_conf {
+ enum rte_bus_scan_mode scan_mode; /**< Scan policy. */
+};
+
+/**
* A structure describing a generic bus.
*/
struct rte_bus {
@@ -89,6 +176,11 @@ struct rte_bus {
const char *name; /**< Name of the bus */
rte_bus_scan_t scan; /**< Scan for devices attached to bus */
rte_bus_probe_t probe; /**< Probe devices on bus */
+ rte_bus_find_device_t find_device; /**< Find a device on the bus */
+ rte_bus_plug_t plug; /**< Probe single device for drivers */
+ rte_bus_unplug_t unplug; /**< Remove single device from driver */
+ rte_bus_parse_t parse; /**< Parse a device name */
+ struct rte_bus_conf conf; /**< Bus configuration */
};
/**
@@ -133,19 +225,68 @@ int rte_bus_probe(void);
*
* @param f
* A valid and open output stream handle
+ */
+void rte_bus_dump(FILE *f);
+
+/**
+ * Bus comparison function.
+ *
+ * @param bus
+ * Bus under test.
+ *
+ * @param data
+ * Data to compare against.
*
* @return
- * 0 in case of success
- * !0 in case there is error in opening the output stream
+ * 0 if the bus matches the data.
+ * !0 if the bus does not match.
+ * <0 if ordering is possible and the bus is lower than the data.
+ * >0 if ordering is possible and the bus is greater than the data.
*/
-void rte_bus_dump(FILE *f);
+typedef int (*rte_bus_cmp_t)(const struct rte_bus *bus, const void *data);
+
+/**
+ * Bus iterator to find a particular bus.
+ *
+ * This function compares each registered bus to find one that matches
+ * the data passed as parameter.
+ *
+ * If the comparison function returns zero this function will stop iterating
+ * over any more buses. To continue a search the bus of a previous search can
+ * be passed via the start parameter.
+ *
+ * @param start
+ * Starting point for the iteration.
+ *
+ * @param cmp
+ * Comparison function.
+ *
+ * @param data
+ * Data to pass to comparison function.
+ *
+ * @return
+ * A pointer to a rte_bus structure or NULL in case no bus matches
+ */
+struct rte_bus *rte_bus_find(const struct rte_bus *start, rte_bus_cmp_t cmp,
+ const void *data);
+
+/**
+ * Find the registered bus for a particular device.
+ */
+struct rte_bus *rte_bus_find_by_device(const struct rte_device *dev);
+
+/**
+ * Find the registered bus for a given name.
+ */
+struct rte_bus *rte_bus_find_by_name(const char *busname);
/**
* Helper for Bus registration.
* The constructor has higher priority than PMD constructors.
*/
#define RTE_REGISTER_BUS(nm, bus) \
-static void __attribute__((constructor(101), used)) businitfn_ ##nm(void) \
+RTE_INIT_PRIO(businitfn_ ##nm, 101); \
+static void businitfn_ ##nm(void) \
{\
(bus).name = RTE_STR(nm);\
rte_bus_register(&bus); \
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index e057f6e2..1afc66e3 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -66,6 +66,12 @@ extern "C" {
#define RTE_STD_C11
#endif
+/** Define GCC_VERSION **/
+#ifdef RTE_TOOLCHAIN_GCC
+#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + \
+ __GNUC_PATCHLEVEL__)
+#endif
+
#ifdef RTE_ARCH_STRICT_ALIGN
typedef uint64_t unaligned_uint64_t __attribute__ ((aligned(1)));
typedef uint32_t unaligned_uint32_t __attribute__ ((aligned(1)));
@@ -102,6 +108,16 @@ typedef uint16_t unaligned_uint16_t;
*/
#define RTE_SET_USED(x) (void)(x)
+/**
+ * Force a function to be inlined
+ */
+#define __rte_always_inline inline __attribute__((always_inline))
+
+/**
+ * Force a function to be noinlined
+ */
+#define __rte_noinline __attribute__((noinline))
+
/*********** Macros for pointer arithmetic ********/
/**
@@ -294,21 +310,6 @@ rte_align64pow2(uint64_t v)
/*********** Other general functions / macros ********/
-#ifdef __SSE2__
-#include <emmintrin.h>
-/**
- * PAUSE instruction for tight loops (avoid busy waiting)
- */
-static inline void
-rte_pause (void)
-{
- _mm_pause();
-}
-#else
-static inline void
-rte_pause(void) {}
-#endif
-
/**
* Searches the input parameter for the least significant set bit
* (starting from zero).
@@ -326,6 +327,23 @@ rte_bsf32(uint32_t v)
return __builtin_ctz(v);
}
+/**
+ * Return the rounded-up log2 of a integer.
+ *
+ * @param v
+ * The input parameter.
+ * @return
+ * The rounded-up log2 of the input, or 0 if the input is 0.
+ */
+static inline uint32_t
+rte_log2_u32(uint32_t v)
+{
+ if (v == 0)
+ return 0;
+ v = rte_align32pow2(v);
+ return rte_bsf32(v);
+}
+
#ifndef offsetof
/** Return the offset of a field in a structure. */
#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index de20c063..5386d3a2 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -115,6 +115,26 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...)
} while (0)
/**
+ * Device driver.
+ */
+enum rte_kernel_driver {
+ RTE_KDRV_UNKNOWN = 0,
+ RTE_KDRV_IGB_UIO,
+ RTE_KDRV_VFIO,
+ RTE_KDRV_UIO_GENERIC,
+ RTE_KDRV_NIC_UIO,
+ RTE_KDRV_NONE,
+};
+
+/**
+ * Device policies.
+ */
+enum rte_dev_policy {
+ RTE_DEV_WHITELISTED,
+ RTE_DEV_BLACKLISTED,
+};
+
+/**
* A generic memory resource representation.
*/
struct rte_mem_resource {
@@ -132,6 +152,8 @@ struct rte_driver {
const char *alias; /**< Driver alias. */
};
+#define RTE_DEV_NAME_MAX_LEN (32)
+
/**
* A structure describing a generic device.
*/
@@ -183,13 +205,67 @@ int rte_eal_dev_attach(const char *name, const char *devargs);
/**
* Detach a device from its driver.
*
- * @param name
- * Same description as for rte_eal_dev_attach().
- * Here, eal will call the driver detaching function.
+ * @param dev
+ * A pointer to a rte_device structure.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_eal_dev_detach(struct rte_device *dev);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Hotplug add a given device to a specific bus.
+ *
+ * @param busname
+ * The bus name the device is added to.
+ * @param devname
+ * The device name. Based on this device name, eal will identify a driver
+ * capable of handling it and pass it to the driver probing function.
+ * @param devargs
+ * Device arguments to be passed to the driver.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_eal_hotplug_add(const char *busname, const char *devname,
+ const char *devargs);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Hotplug remove a given device from a specific bus.
+ *
+ * @param busname
+ * The bus name the device is removed from.
+ * @param devname
+ * The device name being removed.
* @return
* 0 on success, negative on error.
*/
-int rte_eal_dev_detach(const char *name);
+int rte_eal_hotplug_remove(const char *busname, const char *devname);
+
+/**
+ * Device comparison function.
+ *
+ * This type of function is used to compare an rte_device with arbitrary
+ * data.
+ *
+ * @param dev
+ * Device handle.
+ *
+ * @param data
+ * Data to compare against. The type of this parameter is determined by
+ * the kind of comparison performed by the function.
+ *
+ * @return
+ * 0 if the device matches the data.
+ * !0 if the device does not match.
+ * <0 if ordering is possible and the device is lower than the data.
+ * >0 if ordering is possible and the device is greater than the data.
+ */
+typedef int (*rte_dev_cmp_t)(const struct rte_device *dev, const void *data);
#define RTE_PMD_EXPORT_NAME_ARRAY(n, idx) n##idx[]
diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
index 88120a1c..58d585df 100644
--- a/lib/librte_eal/common/include/rte_devargs.h
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -50,7 +50,7 @@ extern "C" {
#include <stdio.h>
#include <sys/queue.h>
-#include <rte_pci.h>
+#include <rte_bus.h>
/**
* Type of generic device
@@ -76,19 +76,12 @@ struct rte_devargs {
TAILQ_ENTRY(rte_devargs) next;
/** Type of device. */
enum rte_devtype type;
- RTE_STD_C11
- union {
- /** Used if type is RTE_DEVTYPE_*_PCI. */
- struct {
- /** PCI location. */
- struct rte_pci_addr addr;
- } pci;
- /** Used if type is RTE_DEVTYPE_VIRTUAL. */
- struct {
- /** Driver name. */
- char drv_name[32];
- } virt;
- };
+ /** Device policy. */
+ enum rte_dev_policy policy;
+ /** Bus handle for the device. */
+ struct rte_bus *bus;
+ /** Name of the device. */
+ char name[RTE_DEV_NAME_MAX_LEN];
/** Arguments string as given by user or "" for no argument. */
char *args;
};
@@ -128,6 +121,39 @@ int rte_eal_parse_devargs_str(const char *devargs_str,
char **drvname, char **drvargs);
/**
+ * Parse a device string.
+ *
+ * Verify that a bus is capable of handling the device passed
+ * in argument. Store which bus will handle the device, its name
+ * and the eventual device parameters.
+ *
+ * @param dev
+ * The device declaration string.
+ * @param da
+ * The devargs structure holding the device information.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative errno on error.
+ */
+int
+rte_eal_devargs_parse(const char *dev,
+ struct rte_devargs *da);
+
+/**
+ * Insert an rte_devargs in the global list.
+ *
+ * @param da
+ * The devargs structure to insert.
+ *
+ * @return
+ * - 0 on success
+ * - Negative on error.
+ */
+int
+rte_eal_devargs_insert(struct rte_devargs *da);
+
+/**
* Add a device to the user device list
*
* For PCI devices, the format of arguments string is "PCI_ADDR" or
@@ -152,6 +178,24 @@ int rte_eal_parse_devargs_str(const char *devargs_str,
int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str);
/**
+ * Remove a device from the user device list.
+ * Its resources are freed.
+ * If the devargs cannot be found, nothing happens.
+ *
+ * @param busname
+ * bus name of the devargs to remove.
+ *
+ * @param devname
+ * device name of the devargs to remove.
+ *
+ * @return
+ * 0 on success.
+ * <0 on error.
+ * >0 if the devargs was not within the user device list.
+ */
+int rte_eal_devargs_remove(const char *busname, const char *devname);
+
+/**
* Count the number of user devices of a specified type
*
* @param devtype
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index abf020bf..0e7363d7 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -61,6 +61,7 @@ extern "C" {
enum rte_lcore_role_t {
ROLE_RTE,
ROLE_OFF,
+ ROLE_SERVICE,
};
/**
@@ -80,6 +81,7 @@ enum rte_proc_type_t {
struct rte_config {
uint32_t master_lcore; /**< Id of the master lcore */
uint32_t lcore_count; /**< Number of available logical cores. */
+ uint32_t service_lcore_count;/**< Number of available service cores. */
enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */
/** Primary or secondary configuration */
@@ -185,6 +187,8 @@ int rte_eal_iopl_init(void);
*
* EPROTO indicates that the PCI bus is either not present, or is not
* readable by the eal.
+ *
+ * ENOEXEC indicates that a service core failed to launch successfully.
*/
int rte_eal_init(int argc, char **argv);
@@ -286,6 +290,9 @@ static inline int rte_gettid(void)
#define RTE_INIT(func) \
static void __attribute__((constructor, used)) func(void)
+#define RTE_INIT_PRIO(func, prio) \
+static void __attribute__((constructor(prio), used)) func(void)
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h
index 2b5e0b17..b9eee702 100644
--- a/lib/librte_eal/common/include/rte_eal_memconfig.h
+++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
@@ -39,6 +39,7 @@
#include <rte_memzone.h>
#include <rte_malloc_heap.h>
#include <rte_rwlock.h>
+#include <rte_pause.h>
#ifdef __cplusplus
extern "C" {
diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h
index fe7b5865..50e0d0fe 100644
--- a/lib/librte_eal/common/include/rte_lcore.h
+++ b/lib/librte_eal/common/include/rte_lcore.h
@@ -73,6 +73,7 @@ struct lcore_config {
unsigned core_id; /**< core number on socket for this lcore */
int core_index; /**< relative index, starting from 0 */
rte_cpuset_t cpuset; /**< cpu set which the lcore affinity to */
+ uint8_t core_role; /**< role of core eg: OFF, RTE, SERVICE */
};
/**
@@ -175,7 +176,7 @@ rte_lcore_is_enabled(unsigned lcore_id)
struct rte_config *cfg = rte_eal_get_configuration();
if (lcore_id >= RTE_MAX_LCORE)
return 0;
- return cfg->lcore_role[lcore_id] != ROLE_OFF;
+ return cfg->lcore_role[lcore_id] == ROLE_RTE;
}
/**
diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h
index 34191385..ec8dba79 100644
--- a/lib/librte_eal/common/include/rte_log.h
+++ b/lib/librte_eal/common/include/rte_log.h
@@ -175,6 +175,16 @@ __rte_deprecated
uint32_t rte_get_log_type(void);
/**
+ * Get the log level for a given type.
+ *
+ * @param logtype
+ * The log type identifier.
+ * @return
+ * 0 on success, a negative value if logtype is invalid.
+ */
+int rte_log_get_level(uint32_t logtype);
+
+/**
* Set the log level for a given type.
*
* @param pattern
diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
index 008ce134..3d37f79b 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -327,9 +327,9 @@ rte_malloc_set_limit(const char *type, size_t max);
* rte_malloc
*
* @param addr
- * Adress obtained from a previous rte_malloc call
+ * Address obtained from a previous rte_malloc call
* @return
- * NULL on error
+ * RTE_BAD_PHYS_ADDR on error
* otherwise return physical address of the buffer
*/
phys_addr_t
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index ab64c63c..8b123391 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -2,6 +2,7 @@
* BSD LICENSE
*
* Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * Copyright 2013-2014 6WIND S.A.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,36 +31,6 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* BSD LICENSE
- *
- * Copyright 2013-2014 6WIND S.A.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of 6WIND S.A. nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
#ifndef _RTE_PCI_H_
#define _RTE_PCI_H_
@@ -92,7 +63,7 @@ const char *pci_get_sysfs_path(void);
/** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */
#define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
-#define PCI_PRI_STR_SIZE sizeof("XXXX:XX:XX.X")
+#define PCI_PRI_STR_SIZE sizeof("XXXXXXXX:XX:XX.X")
/** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */
#define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
@@ -106,9 +77,6 @@ const char *pci_get_sysfs_path(void);
/** Maximum number of PCI resources. */
#define PCI_MAX_RESOURCE 6
-/** Name of PCI Bus */
-#define PCI_BUS_NAME "PCI"
-
/* Forward declarations */
struct rte_pci_device;
struct rte_pci_driver;
@@ -141,7 +109,7 @@ struct rte_pci_id {
* A structure describing the location of a PCI device.
*/
struct rte_pci_addr {
- uint16_t domain; /**< Device domain */
+ uint32_t domain; /**< Device domain */
uint8_t bus; /**< Device bus */
uint8_t devid; /**< Device ID */
uint8_t function; /**< Device function. */
@@ -149,15 +117,6 @@ struct rte_pci_addr {
struct rte_devargs;
-enum rte_kernel_driver {
- RTE_KDRV_UNKNOWN = 0,
- RTE_KDRV_IGB_UIO,
- RTE_KDRV_VFIO,
- RTE_KDRV_UIO_GENERIC,
- RTE_KDRV_NIC_UIO,
- RTE_KDRV_NONE,
-};
-
/**
* A structure describing a PCI device.
*/
@@ -241,6 +200,8 @@ struct rte_pci_bus {
#define RTE_PCI_DRV_INTR_LSC 0x0008
/** Device driver supports device removal interrupt */
#define RTE_PCI_DRV_INTR_RMV 0x0010
+/** Device driver needs to keep mapped resources if unsupported dev detected */
+#define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020
/**
* A structure describing a PCI mapping.
@@ -373,10 +334,10 @@ rte_eal_compare_pci_addr(const struct rte_pci_addr *addr,
if ((addr == NULL) || (addr2 == NULL))
return -1;
- dev_addr = (addr->domain << 24) | (addr->bus << 16) |
- (addr->devid << 8) | addr->function;
- dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
- (addr2->devid << 8) | addr2->function;
+ dev_addr = ((uint64_t)addr->domain << 24) |
+ (addr->bus << 16) | (addr->devid << 8) | addr->function;
+ dev_addr2 = ((uint64_t)addr2->domain << 24) |
+ (addr2->bus << 16) | (addr2->devid << 8) | addr2->function;
if (dev_addr > dev_addr2)
return 1;
diff --git a/lib/librte_eal/common/include/rte_service.h b/lib/librte_eal/common/include/rte_service.h
new file mode 100644
index 00000000..7c6f7383
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_service.h
@@ -0,0 +1,387 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_SERVICE_H_
+#define _RTE_SERVICE_H_
+
+/**
+ * @file
+ *
+ * Service functions
+ *
+ * The service functionality provided by this header allows a DPDK component
+ * to indicate that it requires a function call in order for it to perform
+ * its processing.
+ *
+ * An example usage of this functionality would be a component that registers
+ * a service to perform a particular packet processing duty: for example the
+ * eventdev software PMD. At startup the application requests all services
+ * that have been registered, and the cores in the service-coremask run the
+ * required services. The EAL removes these number of cores from the available
+ * runtime cores, and dedicates them to performing service-core workloads. The
+ * application has access to the remaining lcores as normal.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include<stdio.h>
+#include <stdint.h>
+#include <sys/queue.h>
+
+#include <rte_lcore.h>
+
+/* forward declaration only. Definition in rte_service_private.h */
+struct rte_service_spec;
+
+#define RTE_SERVICE_NAME_MAX 32
+
+/* Capabilities of a service.
+ *
+ * Use the *rte_service_probe_capability* function to check if a service is
+ * capable of a specific capability.
+ */
+/** When set, the service is capable of having multiple threads run it at the
+ * same time.
+ */
+#define RTE_SERVICE_CAP_MT_SAFE (1 << 0)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Return the number of services registered.
+ *
+ * The number of services registered can be passed to *rte_service_get_by_id*,
+ * enabling the application to retrieve the specification of each service.
+ *
+ * @return The number of services registered.
+ */
+uint32_t rte_service_get_count(void);
+
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Return the specification of a service by integer id.
+ *
+ * This function provides the specification of a service. This can be used by
+ * the application to understand what the service represents. The service
+ * must not be modified by the application directly, only passed to the various
+ * rte_service_* functions.
+ *
+ * @param id The integer id of the service to retrieve
+ * @retval non-zero A valid pointer to the service_spec
+ * @retval NULL Invalid *id* provided.
+ */
+struct rte_service_spec *rte_service_get_by_id(uint32_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Return the specification of a service by name.
+ *
+ * This function provides the specification of a service using the service name
+ * as lookup key. This can be used by the application to understand what the
+ * service represents. The service must not be modified by the application
+ * directly, only passed to the various rte_service_* functions.
+ *
+ * @param name The name of the service to retrieve
+ * @retval non-zero A valid pointer to the service_spec
+ * @retval NULL Invalid *name* provided.
+ */
+struct rte_service_spec *rte_service_get_by_name(const char *name);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Return the name of the service.
+ *
+ * @return A pointer to the name of the service. The returned pointer remains
+ * in ownership of the service, and the application must not free it.
+ */
+const char *rte_service_get_name(const struct rte_service_spec *service);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Check if a service has a specific capability.
+ *
+ * This function returns if *service* has implements *capability*.
+ * See RTE_SERVICE_CAP_* defines for a list of valid capabilities.
+ * @retval 1 Capability supported by this service instance
+ * @retval 0 Capability not supported by this service instance
+ */
+int32_t rte_service_probe_capability(const struct rte_service_spec *service,
+ uint32_t capability);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Enable a core to run a service.
+ *
+ * Each core can be added or removed from running specific services. This
+ * functions adds *lcore* to the set of cores that will run *service*.
+ *
+ * If multiple cores are enabled on a service, an atomic is used to ensure that
+ * only one cores runs the service at a time. The exception to this is when
+ * a service indicates that it is multi-thread safe by setting the capability
+ * called RTE_SERVICE_CAP_MT_SAFE. With the multi-thread safe capability set,
+ * the service function can be run on multiple threads at the same time.
+ *
+ * @retval 0 lcore added successfully
+ * @retval -EINVAL An invalid service or lcore was provided.
+ */
+int32_t rte_service_enable_on_lcore(struct rte_service_spec *service,
+ uint32_t lcore);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Disable a core to run a service.
+ *
+ * Each core can be added or removed from running specific services. This
+ * functions removes *lcore* to the set of cores that will run *service*.
+ *
+ * @retval 0 Lcore removed successfully
+ * @retval -EINVAL An invalid service or lcore was provided.
+ */
+int32_t rte_service_disable_on_lcore(struct rte_service_spec *service,
+ uint32_t lcore);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Return if an lcore is enabled for the service.
+ *
+ * This function allows the application to query if *lcore* is currently set to
+ * run *service*.
+ *
+ * @retval 1 Lcore enabled on this lcore
+ * @retval 0 Lcore disabled on this lcore
+ * @retval -EINVAL An invalid service or lcore was provided.
+ */
+int32_t rte_service_get_enabled_on_lcore(struct rte_service_spec *service,
+ uint32_t lcore);
+
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Enable *service* to run.
+ *
+ * This function switches on a service during runtime.
+ * @retval 0 The service was successfully started
+ */
+int32_t rte_service_start(struct rte_service_spec *service);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Disable *service*.
+ *
+ * Switch off a service, so it is not run until it is *rte_service_start* is
+ * called on it.
+ * @retval 0 Service successfully switched off
+ */
+int32_t rte_service_stop(struct rte_service_spec *service);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Returns if *service* is currently running.
+ *
+ * This function returns true if the service has been started using
+ * *rte_service_start*, AND a service core is mapped to the service. This
+ * function can be used to ensure that the service will be run.
+ *
+ * @retval 1 Service is currently running, and has a service lcore mapped
+ * @retval 0 Service is currently stopped, or no service lcore is mapped
+ * @retval -EINVAL Invalid service pointer provided
+ */
+int32_t rte_service_is_running(const struct rte_service_spec *service);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Start a service core.
+ *
+ * Starting a core makes the core begin polling. Any services assigned to it
+ * will be run as fast as possible.
+ *
+ * @retval 0 Success
+ * @retval -EINVAL Failed to start core. The *lcore_id* passed in is not
+ * currently assigned to be a service core.
+ */
+int32_t rte_service_lcore_start(uint32_t lcore_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Stop a service core.
+ *
+ * Stopping a core makes the core become idle, but remains assigned as a
+ * service core.
+ *
+ * @retval 0 Success
+ * @retval -EINVAL Invalid *lcore_id* provided
+ * @retval -EALREADY Already stopped core
+ * @retval -EBUSY Failed to stop core, as it would cause a service to not
+ * be run, as this is the only core currently running the service.
+ * The application must stop the service first, and then stop the
+ * lcore.
+ */
+int32_t rte_service_lcore_stop(uint32_t lcore_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Adds lcore to the list of service cores.
+ *
+ * This functions can be used at runtime in order to modify the service core
+ * mask.
+ *
+ * @retval 0 Success
+ * @retval -EBUSY lcore is busy, and not available for service core duty
+ * @retval -EALREADY lcore is already added to the service core list
+ * @retval -EINVAL Invalid lcore provided
+ */
+int32_t rte_service_lcore_add(uint32_t lcore);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Removes lcore from the list of service cores.
+ *
+ * This can fail if the core is not stopped, see *rte_service_core_stop*.
+ *
+ * @retval 0 Success
+ * @retval -EBUSY Lcore is not stopped, stop service core before removing.
+ * @retval -EINVAL failed to add lcore to service core mask.
+ */
+int32_t rte_service_lcore_del(uint32_t lcore);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve the number of service cores currently available.
+ *
+ * This function returns the integer count of service cores available. The
+ * service core count can be used in mapping logic when creating mappings
+ * from service cores to services.
+ *
+ * See *rte_service_lcore_list* for details on retrieving the lcore_id of each
+ * service core.
+ *
+ * @return The number of service cores currently configured.
+ */
+int32_t rte_service_lcore_count(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Resets all service core mappings. This does not remove the service cores
+ * from duty, just unmaps all services / cores, and stops() the service cores.
+ * The runstate of services is not modified.
+ *
+ * @retval 0 Success
+ */
+int32_t rte_service_lcore_reset_all(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Enable or disable statistics collection for *service*.
+ *
+ * This function enables per core, per-service cycle count collection.
+ * @param service The service to enable statistics gathering on.
+ * @param enable Zero to disable statistics, non-zero to enable.
+ * @retval 0 Success
+ * @retval -EINVAL Invalid service pointer passed
+ */
+int32_t rte_service_set_stats_enable(struct rte_service_spec *service,
+ int32_t enable);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve the list of currently enabled service cores.
+ *
+ * This function fills in an application supplied array, with each element
+ * indicating the lcore_id of a service core.
+ *
+ * Adding and removing service cores can be performed using
+ * *rte_service_lcore_add* and *rte_service_lcore_del*.
+ * @param [out] array An array of at least *rte_service_lcore_count* items.
+ * If statically allocating the buffer, use RTE_MAX_LCORE.
+ * @param [out] n The size of *array*.
+ * @retval >=0 Number of service cores that have been populated in the array
+ * @retval -ENOMEM The provided array is not large enough to fill in the
+ * service core list. No items have been populated, call this function
+ * with a size of at least *rte_service_core_count* items.
+ */
+int32_t rte_service_lcore_list(uint32_t array[], uint32_t n);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Dumps any information available about the service. If service is NULL,
+ * dumps info for all services.
+ */
+int32_t rte_service_dump(FILE *f, struct rte_service_spec *service);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _RTE_SERVICE_H_ */
diff --git a/lib/librte_eal/common/include/rte_service_component.h b/lib/librte_eal/common/include/rte_service_component.h
new file mode 100644
index 00000000..7a946a1e
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_service_component.h
@@ -0,0 +1,144 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_SERVICE_PRIVATE_H_
+#define _RTE_SERVICE_PRIVATE_H_
+
+/* This file specifies the internal service specification.
+ * Include this file if you are writing a component that requires CPU cycles to
+ * operate, and you wish to run the component using service cores
+ */
+
+#include <rte_service.h>
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Signature of callback function to run a service.
+ */
+typedef int32_t (*rte_service_func)(void *args);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * The specification of a service.
+ *
+ * This struct contains metadata about the service itself, the callback
+ * function to run one iteration of the service, a userdata pointer, flags etc.
+ */
+struct rte_service_spec {
+ /** The name of the service. This should be used by the application to
+ * understand what purpose this service provides.
+ */
+ char name[RTE_SERVICE_NAME_MAX];
+ /** The callback to invoke to run one iteration of the service. */
+ rte_service_func callback;
+ /** The userdata pointer provided to the service callback. */
+ void *callback_userdata;
+ /** Flags to indicate the capabilities of this service. See defines in
+ * the public header file for values of RTE_SERVICE_CAP_*
+ */
+ uint32_t capabilities;
+ /** NUMA socket ID that this service is affinitized to */
+ int socket_id;
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Register a new service.
+ *
+ * A service represents a component that the requires CPU time periodically to
+ * achieve its purpose.
+ *
+ * For example the eventdev SW PMD requires CPU cycles to perform its
+ * scheduling. This can be achieved by registering it as a service, and the
+ * application can then assign CPU resources to it using
+ * *rte_service_set_coremask*.
+ *
+ * @param spec The specification of the service to register
+ * @retval 0 Successfully registered the service.
+ * -EINVAL Attempted to register an invalid service (eg, no callback
+ * set)
+ */
+int32_t rte_service_register(const struct rte_service_spec *spec);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Unregister a service.
+ *
+ * The service being removed must be stopped before calling this function.
+ *
+ * @retval 0 The service was successfully unregistered.
+ * @retval -EBUSY The service is currently running, stop the service before
+ * calling unregister. No action has been taken.
+ */
+int32_t rte_service_unregister(struct rte_service_spec *service);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Private function to allow EAL to initialized default mappings.
+ *
+ * This function iterates all the services, and maps then to the available
+ * cores. Based on the capabilities of the services, they are set to run on the
+ * available cores in a round-robin manner.
+ *
+ * @retval 0 Success
+ * @retval -ENOTSUP No service lcores in use
+ * @retval -EINVAL Error while iterating over services
+ * @retval -ENODEV Error in enabling service lcore on a service
+ * @retval -ENOEXEC Error when starting services
+ */
+int32_t rte_service_start_with_defaults(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Initialize the service library.
+ *
+ * In order to use the service library, it must be initialized. EAL initializes
+ * the library at startup.
+ *
+ * @retval 0 Success
+ * @retval -EALREADY Service library is already initialized
+ */
+int32_t rte_service_init(void);
+
+#endif /* _RTE_SERVICE_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/include/rte_time.h b/lib/librte_eal/common/include/rte_time.h
index 28c6274c..373c41ac 100644
--- a/lib/librte_eal/common/include/rte_time.h
+++ b/lib/librte_eal/common/include/rte_time.h
@@ -52,7 +52,7 @@ struct rte_timecounter {
uint64_t nsec_mask;
/** Sub-nanoseconds count. */
uint64_t nsec_frac;
- /** Bitmask for two's complement substraction of non-64 bit counters. */
+ /** Bitmask for two's complement subtraction of non-64 bit counters. */
uint64_t cc_mask;
/** Cycle to nanosecond divisor (power of two). */
uint32_t cc_shift;
diff --git a/lib/librte_eal/common/include/rte_vdev.h b/lib/librte_eal/common/include/rte_vdev.h
index e6b678ea..29f5a523 100644
--- a/lib/librte_eal/common/include/rte_vdev.h
+++ b/lib/librte_eal/common/include/rte_vdev.h
@@ -46,11 +46,18 @@ struct rte_vdev_device {
struct rte_device device; /**< Inherit core device */
};
+/**
+ * @internal
+ * Helper macro for drivers that need to convert to struct rte_vdev_device.
+ */
+#define RTE_DEV_TO_VDEV(ptr) \
+ container_of(ptr, struct rte_vdev_device, device)
+
static inline const char *
rte_vdev_device_name(const struct rte_vdev_device *dev)
{
- if (dev && dev->device.devargs)
- return dev->device.devargs->virt.drv_name;
+ if (dev && dev->device.name)
+ return dev->device.name;
return NULL;
}
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index c36d8526..a69a7075 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -61,12 +61,12 @@ extern "C" {
/**
* Minor version/month number i.e. the mm in yy.mm.z
*/
-#define RTE_VER_MONTH 5
+#define RTE_VER_MONTH 8
/**
* Patch level number i.e. the z in yy.mm.z
*/
-#define RTE_VER_MINOR 1
+#define RTE_VER_MINOR 0
/**
* Extra string to be appended to version number
diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
index 42568e1d..15076905 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -51,7 +51,7 @@
#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE)
/*
- * initialise a general malloc_elem header structure
+ * Initialize a general malloc_elem header structure
*/
void
malloc_elem_init(struct malloc_elem *elem,
@@ -69,7 +69,7 @@ malloc_elem_init(struct malloc_elem *elem,
}
/*
- * initialise a dummy malloc_elem header for the end-of-memseg marker
+ * Initialize a dummy malloc_elem header for the end-of-memseg marker
*/
void
malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev)
@@ -228,7 +228,7 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
elem->pad = old_elem_size;
/* put a dummy header in padding, to point to real element header */
- if (elem->pad > 0){ /* pad will be at least 64-bytes, as everything
+ if (elem->pad > 0) { /* pad will be at least 64-bytes, as everything
* is cache-line aligned */
new_elem->pad = elem->pad;
new_elem->state = ELEM_PAD;
@@ -314,17 +314,16 @@ malloc_elem_free(struct malloc_elem *elem)
int
malloc_elem_resize(struct malloc_elem *elem, size_t size)
{
- const size_t new_size = size + MALLOC_ELEM_OVERHEAD;
+ const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD;
/* if we request a smaller size, then always return ok */
- const size_t current_size = elem->size - elem->pad;
- if (current_size >= new_size)
+ if (elem->size >= new_size)
return 0;
struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
rte_spinlock_lock(&elem->heap->lock);
if (next ->state != ELEM_FREE)
goto err_return;
- if (current_size + next->size < new_size)
+ if (elem->size + next->size < new_size)
goto err_return;
/* we now know the element fits, so remove from free list,
@@ -333,7 +332,7 @@ malloc_elem_resize(struct malloc_elem *elem, size_t size)
elem_free_list_remove(next);
join_elem(elem, next);
- if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD){
+ if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
/* now we have a big block together. Lets cut it down a bit, by splitting */
struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size);
split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE);
diff --git a/lib/librte_eal/common/rte_keepalive.c b/lib/librte_eal/common/rte_keepalive.c
index 9765d1bd..cdd69560 100644
--- a/lib/librte_eal/common/rte_keepalive.c
+++ b/lib/librte_eal/common/rte_keepalive.c
@@ -38,7 +38,6 @@
#include <rte_log.h>
#include <rte_keepalive.h>
#include <rte_malloc.h>
-#include <rte_cycles.h>
struct rte_keepalive {
/** Core Liveness. */
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
index f4a88352..5c0627bf 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -253,6 +253,8 @@ rte_malloc_virt2phy(const void *addr)
{
const struct malloc_elem *elem = malloc_elem_from_data(addr);
if (elem == NULL)
- return 0;
+ return RTE_BAD_PHYS_ADDR;
+ if (elem->ms->phys_addr == RTE_BAD_PHYS_ADDR)
+ return RTE_BAD_PHYS_ADDR;
return elem->ms->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->ms->addr);
}
diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
new file mode 100644
index 00000000..7efb76dc
--- /dev/null
+++ b/lib/librte_eal/common/rte_service.c
@@ -0,0 +1,706 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <string.h>
+#include <dirent.h>
+
+#include <rte_service.h>
+#include "include/rte_service_component.h"
+
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+#include <rte_atomic.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+
+#define RTE_SERVICE_NUM_MAX 64
+
+#define SERVICE_F_REGISTERED (1 << 0)
+#define SERVICE_F_STATS_ENABLED (1 << 1)
+
+/* runstates for services and lcores, denoting if they are active or not */
+#define RUNSTATE_STOPPED 0
+#define RUNSTATE_RUNNING 1
+
+/* internal representation of a service */
+struct rte_service_spec_impl {
+ /* public part of the struct */
+ struct rte_service_spec spec;
+
+ /* atomic lock that when set indicates a service core is currently
+ * running this service callback. When not set, a core may take the
+ * lock and then run the service callback.
+ */
+ rte_atomic32_t execute_lock;
+
+ /* API set/get-able variables */
+ int32_t runstate;
+ uint8_t internal_flags;
+
+ /* per service statistics */
+ uint32_t num_mapped_cores;
+ uint64_t calls;
+ uint64_t cycles_spent;
+} __rte_cache_aligned;
+
+/* the internal values of a service core */
+struct core_state {
+ /* map of services IDs are run on this core */
+ uint64_t service_mask;
+ uint8_t runstate; /* running or stopped */
+ uint8_t is_service_core; /* set if core is currently a service core */
+
+ /* extreme statistics */
+ uint64_t calls_per_service[RTE_SERVICE_NUM_MAX];
+} __rte_cache_aligned;
+
+static uint32_t rte_service_count;
+static struct rte_service_spec_impl *rte_services;
+static struct core_state *lcore_states;
+static uint32_t rte_service_library_initialized;
+
+int32_t rte_service_init(void)
+{
+ if (rte_service_library_initialized) {
+ printf("service library init() called, init flag %d\n",
+ rte_service_library_initialized);
+ return -EALREADY;
+ }
+
+ rte_services = rte_calloc("rte_services", RTE_SERVICE_NUM_MAX,
+ sizeof(struct rte_service_spec_impl),
+ RTE_CACHE_LINE_SIZE);
+ if (!rte_services) {
+ printf("error allocating rte services array\n");
+ return -ENOMEM;
+ }
+
+ lcore_states = rte_calloc("rte_service_core_states", RTE_MAX_LCORE,
+ sizeof(struct core_state), RTE_CACHE_LINE_SIZE);
+ if (!lcore_states) {
+ printf("error allocating core states array\n");
+ return -ENOMEM;
+ }
+
+ int i;
+ int count = 0;
+ struct rte_config *cfg = rte_eal_get_configuration();
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (lcore_config[i].core_role == ROLE_SERVICE) {
+ if ((unsigned int)i == cfg->master_lcore)
+ continue;
+ rte_service_lcore_add(i);
+ count++;
+ }
+ }
+
+ rte_service_library_initialized = 1;
+ return 0;
+}
+
+/* returns 1 if service is registered and has not been unregistered
+ * Returns 0 if service never registered, or has been unregistered
+ */
+static inline int
+service_valid(uint32_t id)
+{
+ return !!(rte_services[id].internal_flags & SERVICE_F_REGISTERED);
+}
+
+/* returns 1 if statistics should be colleced for service
+ * Returns 0 if statistics should not be collected for service
+ */
+static inline int
+service_stats_enabled(struct rte_service_spec_impl *impl)
+{
+ return !!(impl->internal_flags & SERVICE_F_STATS_ENABLED);
+}
+
+static inline int
+service_mt_safe(struct rte_service_spec_impl *s)
+{
+ return s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE;
+}
+
+int32_t rte_service_set_stats_enable(struct rte_service_spec *service,
+ int32_t enabled)
+{
+ struct rte_service_spec_impl *impl =
+ (struct rte_service_spec_impl *)service;
+ if (!impl)
+ return -EINVAL;
+
+ if (enabled)
+ impl->internal_flags |= SERVICE_F_STATS_ENABLED;
+ else
+ impl->internal_flags &= ~(SERVICE_F_STATS_ENABLED);
+
+ return 0;
+}
+
+uint32_t
+rte_service_get_count(void)
+{
+ return rte_service_count;
+}
+
+struct rte_service_spec *
+rte_service_get_by_id(uint32_t id)
+{
+ struct rte_service_spec *service = NULL;
+ if (id < rte_service_count)
+ service = (struct rte_service_spec *)&rte_services[id];
+
+ return service;
+}
+
+struct rte_service_spec *rte_service_get_by_name(const char *name)
+{
+ struct rte_service_spec *service = NULL;
+ int i;
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if (service_valid(i) &&
+ strcmp(name, rte_services[i].spec.name) == 0) {
+ service = (struct rte_service_spec *)&rte_services[i];
+ break;
+ }
+ }
+
+ return service;
+}
+
+const char *
+rte_service_get_name(const struct rte_service_spec *service)
+{
+ return service->name;
+}
+
+int32_t
+rte_service_probe_capability(const struct rte_service_spec *service,
+ uint32_t capability)
+{
+ return service->capabilities & capability;
+}
+
+int32_t
+rte_service_is_running(const struct rte_service_spec *spec)
+{
+ const struct rte_service_spec_impl *impl =
+ (const struct rte_service_spec_impl *)spec;
+ if (!impl)
+ return -EINVAL;
+
+ return (impl->runstate == RUNSTATE_RUNNING) &&
+ (impl->num_mapped_cores > 0);
+}
+
+int32_t
+rte_service_register(const struct rte_service_spec *spec)
+{
+ uint32_t i;
+ int32_t free_slot = -1;
+
+ if (spec->callback == NULL || strlen(spec->name) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if (!service_valid(i)) {
+ free_slot = i;
+ break;
+ }
+ }
+
+ if ((free_slot < 0) || (i == RTE_SERVICE_NUM_MAX))
+ return -ENOSPC;
+
+ struct rte_service_spec_impl *s = &rte_services[free_slot];
+ s->spec = *spec;
+ s->internal_flags |= SERVICE_F_REGISTERED;
+
+ rte_smp_wmb();
+ rte_service_count++;
+
+ return 0;
+}
+
+int32_t
+rte_service_unregister(struct rte_service_spec *spec)
+{
+ struct rte_service_spec_impl *s = NULL;
+ struct rte_service_spec_impl *spec_impl =
+ (struct rte_service_spec_impl *)spec;
+
+ uint32_t i;
+ uint32_t service_id;
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if (&rte_services[i] == spec_impl) {
+ s = spec_impl;
+ service_id = i;
+ break;
+ }
+ }
+
+ if (!s)
+ return -EINVAL;
+
+ rte_service_count--;
+ rte_smp_wmb();
+
+ s->internal_flags &= ~(SERVICE_F_REGISTERED);
+
+ for (i = 0; i < RTE_MAX_LCORE; i++)
+ lcore_states[i].service_mask &= ~(UINT64_C(1) << service_id);
+
+ memset(&rte_services[service_id], 0,
+ sizeof(struct rte_service_spec_impl));
+
+ return 0;
+}
+
+int32_t
+rte_service_start(struct rte_service_spec *service)
+{
+ struct rte_service_spec_impl *s =
+ (struct rte_service_spec_impl *)service;
+ s->runstate = RUNSTATE_RUNNING;
+ rte_smp_wmb();
+ return 0;
+}
+
+int32_t
+rte_service_stop(struct rte_service_spec *service)
+{
+ struct rte_service_spec_impl *s =
+ (struct rte_service_spec_impl *)service;
+ s->runstate = RUNSTATE_STOPPED;
+ rte_smp_wmb();
+ return 0;
+}
+
+static int32_t
+rte_service_runner_func(void *arg)
+{
+ RTE_SET_USED(arg);
+ uint32_t i;
+ const int lcore = rte_lcore_id();
+ struct core_state *cs = &lcore_states[lcore];
+
+ while (lcore_states[lcore].runstate == RUNSTATE_RUNNING) {
+ const uint64_t service_mask = cs->service_mask;
+ for (i = 0; i < rte_service_count; i++) {
+ struct rte_service_spec_impl *s = &rte_services[i];
+ if (s->runstate != RUNSTATE_RUNNING ||
+ !(service_mask & (UINT64_C(1) << i)))
+ continue;
+
+ /* check do we need cmpset, if MT safe or <= 1 core
+ * mapped, atomic ops are not required.
+ */
+ const int need_cmpset = !((service_mt_safe(s) == 0) &&
+ (s->num_mapped_cores > 1));
+ uint32_t *lock = (uint32_t *)&s->execute_lock;
+
+ if (need_cmpset || rte_atomic32_cmpset(lock, 0, 1)) {
+ void *userdata = s->spec.callback_userdata;
+
+ if (service_stats_enabled(s)) {
+ uint64_t start = rte_rdtsc();
+ s->spec.callback(userdata);
+ uint64_t end = rte_rdtsc();
+ s->cycles_spent += end - start;
+ cs->calls_per_service[i]++;
+ s->calls++;
+ } else
+ s->spec.callback(userdata);
+
+ if (need_cmpset)
+ rte_atomic32_clear(&s->execute_lock);
+ }
+ }
+
+ rte_smp_rmb();
+ }
+
+ lcore_config[lcore].state = WAIT;
+
+ return 0;
+}
+
+int32_t
+rte_service_lcore_count(void)
+{
+ int32_t count = 0;
+ uint32_t i;
+ for (i = 0; i < RTE_MAX_LCORE; i++)
+ count += lcore_states[i].is_service_core;
+ return count;
+}
+
+int32_t
+rte_service_lcore_list(uint32_t array[], uint32_t n)
+{
+ uint32_t count = rte_service_lcore_count();
+ if (count > n)
+ return -ENOMEM;
+
+ if (!array)
+ return -EINVAL;
+
+ uint32_t i;
+ uint32_t idx = 0;
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ struct core_state *cs = &lcore_states[i];
+ if (cs->is_service_core) {
+ array[idx] = i;
+ idx++;
+ }
+ }
+
+ return count;
+}
+
+int32_t
+rte_service_start_with_defaults(void)
+{
+ /* create a default mapping from cores to services, then start the
+ * services to make them transparent to unaware applications.
+ */
+ uint32_t i;
+ int ret;
+ uint32_t count = rte_service_get_count();
+
+ int32_t lcore_iter = 0;
+ uint32_t ids[RTE_MAX_LCORE];
+ int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
+
+ if (lcore_count == 0)
+ return -ENOTSUP;
+
+ for (i = 0; (int)i < lcore_count; i++)
+ rte_service_lcore_start(ids[i]);
+
+ for (i = 0; i < count; i++) {
+ struct rte_service_spec *s = rte_service_get_by_id(i);
+ if (!s)
+ return -EINVAL;
+
+ /* do 1:1 core mapping here, with each service getting
+ * assigned a single core by default. Adding multiple services
+ * should multiplex to a single core, or 1:1 if there are the
+ * same amount of services as service-cores
+ */
+ ret = rte_service_enable_on_lcore(s, ids[lcore_iter]);
+ if (ret)
+ return -ENODEV;
+
+ lcore_iter++;
+ if (lcore_iter >= lcore_count)
+ lcore_iter = 0;
+
+ ret = rte_service_start(s);
+ if (ret)
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static int32_t
+service_update(struct rte_service_spec *service, uint32_t lcore,
+ uint32_t *set, uint32_t *enabled)
+{
+ uint32_t i;
+ int32_t sid = -1;
+
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if ((struct rte_service_spec *)&rte_services[i] == service &&
+ service_valid(i)) {
+ sid = i;
+ break;
+ }
+ }
+
+ if (sid == -1 || lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ if (!lcore_states[lcore].is_service_core)
+ return -EINVAL;
+
+ uint64_t sid_mask = UINT64_C(1) << sid;
+ if (set) {
+ if (*set) {
+ lcore_states[lcore].service_mask |= sid_mask;
+ rte_services[sid].num_mapped_cores++;
+ } else {
+ lcore_states[lcore].service_mask &= ~(sid_mask);
+ rte_services[sid].num_mapped_cores--;
+ }
+ }
+
+ if (enabled)
+ *enabled = (lcore_states[lcore].service_mask & (sid_mask));
+
+ rte_smp_wmb();
+
+ return 0;
+}
+
+int32_t rte_service_get_enabled_on_lcore(struct rte_service_spec *service,
+ uint32_t lcore)
+{
+ uint32_t enabled;
+ int ret = service_update(service, lcore, 0, &enabled);
+ if (ret == 0)
+ return enabled;
+ return -EINVAL;
+}
+
+int32_t
+rte_service_enable_on_lcore(struct rte_service_spec *service, uint32_t lcore)
+{
+ uint32_t on = 1;
+ return service_update(service, lcore, &on, 0);
+}
+
+int32_t
+rte_service_disable_on_lcore(struct rte_service_spec *service, uint32_t lcore)
+{
+ uint32_t off = 0;
+ return service_update(service, lcore, &off, 0);
+}
+
+int32_t rte_service_lcore_reset_all(void)
+{
+ /* loop over cores, reset all to mask 0 */
+ uint32_t i;
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ lcore_states[i].service_mask = 0;
+ lcore_states[i].is_service_core = 0;
+ lcore_states[i].runstate = RUNSTATE_STOPPED;
+ }
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++)
+ rte_services[i].num_mapped_cores = 0;
+
+ rte_smp_wmb();
+
+ return 0;
+}
+
+static void
+set_lcore_state(uint32_t lcore, int32_t state)
+{
+ /* mark core state in hugepage backed config */
+ struct rte_config *cfg = rte_eal_get_configuration();
+ cfg->lcore_role[lcore] = state;
+
+ /* mark state in process local lcore_config */
+ lcore_config[lcore].core_role = state;
+
+ /* update per-lcore optimized state tracking */
+ lcore_states[lcore].is_service_core = (state == ROLE_SERVICE);
+}
+
+int32_t
+rte_service_lcore_add(uint32_t lcore)
+{
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+ if (lcore_states[lcore].is_service_core)
+ return -EALREADY;
+
+ set_lcore_state(lcore, ROLE_SERVICE);
+
+ /* ensure that after adding a core the mask and state are defaults */
+ lcore_states[lcore].service_mask = 0;
+ lcore_states[lcore].runstate = RUNSTATE_STOPPED;
+
+ rte_smp_wmb();
+ return 0;
+}
+
+int32_t
+rte_service_lcore_del(uint32_t lcore)
+{
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ struct core_state *cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -EINVAL;
+
+ if (cs->runstate != RUNSTATE_STOPPED)
+ return -EBUSY;
+
+ set_lcore_state(lcore, ROLE_RTE);
+
+ rte_smp_wmb();
+ return 0;
+}
+
+int32_t
+rte_service_lcore_start(uint32_t lcore)
+{
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ struct core_state *cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -EINVAL;
+
+ if (cs->runstate == RUNSTATE_RUNNING)
+ return -EALREADY;
+
+ /* set core to run state first, and then launch otherwise it will
+ * return immediately as runstate keeps it in the service poll loop
+ */
+ lcore_states[lcore].runstate = RUNSTATE_RUNNING;
+
+ int ret = rte_eal_remote_launch(rte_service_runner_func, 0, lcore);
+ /* returns -EBUSY if the core is already launched, 0 on success */
+ return ret;
+}
+
+int32_t
+rte_service_lcore_stop(uint32_t lcore)
+{
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ if (lcore_states[lcore].runstate == RUNSTATE_STOPPED)
+ return -EALREADY;
+
+ uint32_t i;
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ int32_t enabled =
+ lcore_states[i].service_mask & (UINT64_C(1) << i);
+ int32_t service_running = rte_services[i].runstate !=
+ RUNSTATE_STOPPED;
+ int32_t only_core = rte_services[i].num_mapped_cores == 1;
+
+ /* if the core is mapped, and the service is running, and this
+ * is the only core that is mapped, the service would cease to
+ * run if this core stopped, so fail instead.
+ */
+ if (enabled && service_running && only_core)
+ return -EBUSY;
+ }
+
+ lcore_states[lcore].runstate = RUNSTATE_STOPPED;
+
+ return 0;
+}
+
+static void
+rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s,
+ uint64_t all_cycles, uint32_t reset)
+{
+ /* avoid divide by zero */
+ if (all_cycles == 0)
+ all_cycles = 1;
+
+ int calls = 1;
+ if (s->calls != 0)
+ calls = s->calls;
+
+ fprintf(f, " %s: stats %d\tcalls %"PRIu64"\tcycles %"
+ PRIu64"\tavg: %"PRIu64"\n",
+ s->spec.name, service_stats_enabled(s), s->calls,
+ s->cycles_spent, s->cycles_spent / calls);
+
+ if (reset) {
+ s->cycles_spent = 0;
+ s->calls = 0;
+ }
+}
+
+static void
+service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset)
+{
+ uint32_t i;
+ struct core_state *cs = &lcore_states[lcore];
+
+ fprintf(f, "%02d\t", lcore);
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if (!service_valid(i))
+ continue;
+ fprintf(f, "%"PRIu64"\t", cs->calls_per_service[i]);
+ if (reset)
+ cs->calls_per_service[i] = 0;
+ }
+ fprintf(f, "\n");
+}
+
+int32_t rte_service_dump(FILE *f, struct rte_service_spec *service)
+{
+ uint32_t i;
+
+ uint64_t total_cycles = 0;
+ for (i = 0; i < rte_service_count; i++) {
+ if (!service_valid(i))
+ continue;
+ total_cycles += rte_services[i].cycles_spent;
+ }
+
+ if (service) {
+ struct rte_service_spec_impl *s =
+ (struct rte_service_spec_impl *)service;
+ fprintf(f, "Service %s Summary\n", s->spec.name);
+ uint32_t reset = 0;
+ rte_service_dump_one(f, s, total_cycles, reset);
+ return 0;
+ }
+
+ fprintf(f, "Services Summary\n");
+ for (i = 0; i < rte_service_count; i++) {
+ uint32_t reset = 1;
+ rte_service_dump_one(f, &rte_services[i], total_cycles, reset);
+ }
+
+ fprintf(f, "Service Cores Summary\n");
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (lcore_config[i].core_role != ROLE_SERVICE)
+ continue;
+
+ uint32_t reset = 0;
+ service_dump_calls_per_lcore(f, i, reset);
+ }
+
+ return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 640afd08..90bca4d6 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -37,7 +37,7 @@ ARCH_DIR ?= $(RTE_ARCH)
EXPORT_MAP := rte_eal_version.map
VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
-LIBABIVER := 4
+LIBABIVER := 5
VPATH += $(RTE_SDK)/lib/librte_eal/common
@@ -50,6 +50,9 @@ LDLIBS += -ldl
LDLIBS += -lpthread
LDLIBS += -lgcc_s
LDLIBS += -lrt
+ifeq ($(CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES),y)
+LDLIBS += -lnuma
+endif
# specific to linuxapp exec-env
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) := eal.c
@@ -96,6 +99,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_malloc.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_keepalive.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c
# from arch dir
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 7c78f2dc..48f12f44 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -46,7 +46,6 @@
#include <stddef.h>
#include <errno.h>
#include <limits.h>
-#include <errno.h>
#include <sys/mman.h>
#include <sys/queue.h>
#include <sys/stat.h>
@@ -64,6 +63,7 @@
#include <rte_errno.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
+#include <rte_service_component.h>
#include <rte_log.h>
#include <rte_random.h>
#include <rte_cycles.h>
@@ -74,7 +74,6 @@
#include <rte_pci.h>
#include <rte_dev.h>
#include <rte_devargs.h>
-#include <rte_common.h>
#include <rte_version.h>
#include <rte_atomic.h>
#include <malloc_heap.h>
@@ -890,6 +889,11 @@ rte_eal_init(int argc, char **argv)
return -1;
}
+ if (eal_option_device_parse()) {
+ rte_errno = ENODEV;
+ return -1;
+ }
+
if (rte_bus_scan()) {
rte_eal_init_alert("Cannot scan the buses for devices\n");
rte_errno = ENODEV;
@@ -932,6 +936,14 @@ rte_eal_init(int argc, char **argv)
rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
rte_eal_mp_wait_lcore();
+ /* initialize services so vdevs register service during bus_probe. */
+ ret = rte_service_init();
+ if (ret) {
+ rte_eal_init_alert("rte_service_init() failed\n");
+ rte_errno = ENOEXEC;
+ return -1;
+ }
+
/* Probe all the buses and devices/drivers on them */
if (rte_bus_probe()) {
rte_eal_init_alert("Cannot probe devices\n");
@@ -939,6 +951,15 @@ rte_eal_init(int argc, char **argv)
return -1;
}
+ /* initialize default service/lcore mappings and start running. Ignore
+ * -ENOTSUP, as it indicates no service coremask passed to EAL.
+ */
+ ret = rte_service_start_with_defaults();
+ if (ret < 0 && ret != -ENOTSUP) {
+ rte_errno = ENOEXEC;
+ return -1;
+ }
+
rte_eal_mcfg_complete();
return fctret;
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index 2e3bd12a..3e9ac41e 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -64,6 +64,7 @@
#include <rte_malloc.h>
#include <rte_errno.h>
#include <rte_spinlock.h>
+#include <rte_pause.h>
#include "eal_private.h"
#include "eal_vfio.h"
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index ebe06833..52791282 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -2,6 +2,7 @@
* BSD LICENSE
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 6WIND.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,36 +31,6 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* BSD LICENSE
- *
- * Copyright(c) 2013 6WIND.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of 6WIND S.A. nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
#define _FILE_OFFSET_BITS 64
#include <errno.h>
@@ -70,7 +41,6 @@
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
-#include <stdarg.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -78,11 +48,14 @@
#include <sys/file.h>
#include <unistd.h>
#include <limits.h>
-#include <errno.h>
#include <sys/ioctl.h>
#include <sys/time.h>
#include <signal.h>
#include <setjmp.h>
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+#include <numa.h>
+#include <numaif.h>
+#endif
#include <rte_log.h>
#include <rte_memory.h>
@@ -137,6 +110,13 @@ test_phys_addrs_available(void)
if (rte_xen_dom0_supported())
return;
+ if (!rte_eal_has_hugepages()) {
+ RTE_LOG(ERR, EAL,
+ "Started without hugepages support, physical addresses not available\n");
+ phys_addrs_available = false;
+ return;
+ }
+
physaddr = rte_mem_virt2phy(&tmp);
if (physaddr == RTE_BAD_PHYS_ADDR) {
RTE_LOG(ERR, EAL,
@@ -147,16 +127,6 @@ test_phys_addrs_available(void)
}
}
-/* Lock page in physical memory and prevent from swapping. */
-int
-rte_mem_lock_page(const void *virt)
-{
- unsigned long virtual = (unsigned long)virt;
- int page_size = getpagesize();
- unsigned long aligned = (virtual & ~ (page_size - 1));
- return mlock((void*)aligned, page_size);
-}
-
/*
* Get physical address of any mapped virtual address in the current process.
*/
@@ -387,6 +357,14 @@ static int huge_wrap_sigsetjmp(void)
return sigsetjmp(huge_jmpenv, 1);
}
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+/* Callback for numa library. */
+void numa_error(char *where)
+{
+ RTE_LOG(ERR, EAL, "%s failed: %s\n", where, strerror(errno));
+}
+#endif
+
/*
* Mmap all hugepages of hugepage table: it first open a file in
* hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the
@@ -395,18 +373,78 @@ static int huge_wrap_sigsetjmp(void)
* map continguous physical blocks in contiguous virtual blocks.
*/
static unsigned
-map_all_hugepages(struct hugepage_file *hugepg_tbl,
- struct hugepage_info *hpi, int orig)
+map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
+ uint64_t *essential_memory __rte_unused, int orig)
{
int fd;
unsigned i;
void *virtaddr;
void *vma_addr = NULL;
size_t vma_len = 0;
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ int node_id = -1;
+ int essential_prev = 0;
+ int oldpolicy;
+ struct bitmask *oldmask = numa_allocate_nodemask();
+ bool have_numa = true;
+ unsigned long maxnode = 0;
+
+ /* Check if kernel supports NUMA. */
+ if (numa_available() != 0) {
+ RTE_LOG(DEBUG, EAL, "NUMA is not supported.\n");
+ have_numa = false;
+ }
+
+ if (orig && have_numa) {
+ RTE_LOG(DEBUG, EAL, "Trying to obtain current memory policy.\n");
+ if (get_mempolicy(&oldpolicy, oldmask->maskp,
+ oldmask->size + 1, 0, 0) < 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to get current mempolicy: %s. "
+ "Assuming MPOL_DEFAULT.\n", strerror(errno));
+ oldpolicy = MPOL_DEFAULT;
+ }
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ if (internal_config.socket_mem[i])
+ maxnode = i + 1;
+ }
+#endif
for (i = 0; i < hpi->num_pages[0]; i++) {
uint64_t hugepage_sz = hpi->hugepage_sz;
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ if (maxnode) {
+ unsigned int j;
+
+ for (j = 0; j < maxnode; j++)
+ if (essential_memory[j])
+ break;
+
+ if (j == maxnode) {
+ node_id = (node_id + 1) % maxnode;
+ while (!internal_config.socket_mem[node_id]) {
+ node_id++;
+ node_id %= maxnode;
+ }
+ essential_prev = 0;
+ } else {
+ node_id = j;
+ essential_prev = essential_memory[j];
+
+ if (essential_memory[j] < hugepage_sz)
+ essential_memory[j] = 0;
+ else
+ essential_memory[j] -= hugepage_sz;
+ }
+
+ RTE_LOG(DEBUG, EAL,
+ "Setting policy MPOL_PREFERRED for socket %d\n",
+ node_id);
+ numa_set_preferred(node_id);
+ }
+#endif
+
if (orig) {
hugepg_tbl[i].file_id = i;
hugepg_tbl[i].size = hugepage_sz;
@@ -461,7 +499,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
if (fd < 0) {
RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__,
strerror(errno));
- return i;
+ goto out;
}
/* map the segment, and populate page tables,
@@ -472,7 +510,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
RTE_LOG(DEBUG, EAL, "%s(): mmap failed: %s\n", __func__,
strerror(errno));
close(fd);
- return i;
+ goto out;
}
if (orig) {
@@ -497,7 +535,12 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
munmap(virtaddr, hugepage_sz);
close(fd);
unlink(hugepg_tbl[i].filepath);
- return i;
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ if (maxnode)
+ essential_memory[node_id] =
+ essential_prev;
+#endif
+ goto out;
}
*(int *)virtaddr = 0;
}
@@ -508,7 +551,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
RTE_LOG(DEBUG, EAL, "%s(): Locking file failed:%s \n",
__func__, strerror(errno));
close(fd);
- return i;
+ goto out;
}
close(fd);
@@ -517,6 +560,22 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
vma_len -= hugepage_sz;
}
+out:
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ if (maxnode) {
+ RTE_LOG(DEBUG, EAL,
+ "Restoring previous memory policy: %d\n", oldpolicy);
+ if (oldpolicy == MPOL_DEFAULT) {
+ numa_set_localalloc();
+ } else if (set_mempolicy(oldpolicy, oldmask->maskp,
+ oldmask->size + 1) < 0) {
+ RTE_LOG(ERR, EAL, "Failed to restore mempolicy: %s\n",
+ strerror(errno));
+ numa_set_localalloc();
+ }
+ }
+ numa_free_cpumask(oldmask);
+#endif
return i;
}
@@ -551,8 +610,8 @@ find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
f = fopen("/proc/self/numa_maps", "r");
if (f == NULL) {
- RTE_LOG(NOTICE, EAL, "cannot open /proc/self/numa_maps,"
- " consider that all memory is in socket_id 0\n");
+ RTE_LOG(NOTICE, EAL, "NUMA support not available"
+ " consider that all memory is in socket_id 0\n");
return 0;
}
@@ -601,6 +660,11 @@ find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
if (hugepg_tbl[i].orig_va == va) {
hugepg_tbl[i].socket_id = socket_id;
hp_count++;
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ RTE_LOG(DEBUG, EAL,
+ "Hugepage %s is on socket %d\n",
+ hugepg_tbl[i].filepath, socket_id);
+#endif
}
}
}
@@ -995,7 +1059,7 @@ rte_eal_hugepage_init(void)
strerror(errno));
return -1;
}
- mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr;
+ mcfg->memseg[0].phys_addr = RTE_BAD_PHYS_ADDR;
mcfg->memseg[0].addr = addr;
mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K;
mcfg->memseg[0].len = internal_config.memory;
@@ -1039,6 +1103,11 @@ rte_eal_hugepage_init(void)
huge_register_sigbus();
+ /* make a copy of socket_mem, needed for balanced allocation. */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ memory[i] = internal_config.socket_mem[i];
+
+
/* map all hugepages and sort them */
for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){
unsigned pages_old, pages_new;
@@ -1056,7 +1125,8 @@ rte_eal_hugepage_init(void)
/* map all hugepages available */
pages_old = hpi->num_pages[0];
- pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, 1);
+ pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi,
+ memory, 1);
if (pages_new < pages_old) {
RTE_LOG(DEBUG, EAL,
"%d not %d hugepages of size %u MB allocated\n",
@@ -1099,7 +1169,7 @@ rte_eal_hugepage_init(void)
sizeof(struct hugepage_file), cmp_physaddr);
/* remap all hugepages */
- if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) !=
+ if (map_all_hugepages(&tmp_hp[hp_offset], hpi, NULL, 0) !=
hpi->num_pages[0]) {
RTE_LOG(ERR, EAL, "Failed to remap %u MB pages\n",
(unsigned)(hpi->hugepage_sz / 0x100000));
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 595622b2..8951ce74 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -310,22 +310,20 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr)
dev->max_vfs = (uint16_t)tmp;
}
- /* get numa node */
+ /* get numa node, default to 0 if not present */
snprintf(filename, sizeof(filename), "%s/numa_node",
dirname);
- if (access(filename, R_OK) != 0) {
- /* if no NUMA support, set default to 0 */
- dev->device.numa_node = 0;
+
+ if (access(filename, F_OK) != -1) {
+ if (eal_parse_sysfs_value(filename, &tmp) == 0)
+ dev->device.numa_node = tmp;
+ else
+ dev->device.numa_node = -1;
} else {
- if (eal_parse_sysfs_value(filename, &tmp) < 0) {
- free(dev);
- return -1;
- }
- dev->device.numa_node = tmp;
+ dev->device.numa_node = 0;
}
- rte_pci_device_name(addr, dev->name, sizeof(dev->name));
- dev->device.name = dev->name;
+ pci_name_set(dev);
/* parse resources */
snprintf(filename, sizeof(filename), "%s/resource", dirname);
@@ -373,6 +371,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr)
} else { /* already registered */
dev2->kdrv = dev->kdrv;
dev2->max_vfs = dev->max_vfs;
+ pci_name_set(dev2);
memmove(dev2->mem_resource, dev->mem_resource,
sizeof(dev->mem_resource));
free(dev);
@@ -430,10 +429,10 @@ parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr)
/* now convert to int values */
errno = 0;
- addr->domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16);
- addr->bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16);
- addr->devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16);
- addr->function = (uint8_t)strtoul(splitaddr.function, NULL, 10);
+ addr->domain = strtoul(splitaddr.domain, NULL, 16);
+ addr->bus = strtoul(splitaddr.bus, NULL, 16);
+ addr->devid = strtoul(splitaddr.devid, NULL, 16);
+ addr->function = strtoul(splitaddr.function, NULL, 10);
if (errno != 0)
goto error;
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index 2be13195..aa9d96ed 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -214,7 +214,7 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
intr_idx = VFIO_PCI_NUM_IRQS;
/* get interrupt type from internal config (MSI-X by default, can be
- * overriden from the command line
+ * overridden from the command line
*/
switch (internal_config.vfio_intr_mode) {
case RTE_INTR_MODE_MSIX:
diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
index 9f88530e..6481eeea 100644
--- a/lib/librte_eal/linuxapp/eal/eal_thread.c
+++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
@@ -49,7 +49,6 @@
#include <rte_memzone.h>
#include <rte_per_lcore.h>
#include <rte_eal.h>
-#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include "eal_private.h"
@@ -184,7 +183,14 @@ eal_thread_loop(__attribute__((unused)) void *arg)
ret = lcore_config[lcore_id].f(fct_arg);
lcore_config[lcore_id].ret = ret;
rte_wmb();
- lcore_config[lcore_id].state = FINISHED;
+
+ /* when a service core returns, it should go directly to WAIT
+ * state, because the application will not lcore_wait() for it.
+ */
+ if (lcore_config[lcore_id].core_role == ROLE_SERVICE)
+ lcore_config[lcore_id].state = WAIT;
+ else
+ lcore_config[lcore_id].state = FINISHED;
}
/* never reached */
diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
index bddbdb07..19db1cb5 100644
--- a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
@@ -38,7 +38,6 @@
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
-#include <stdarg.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -46,7 +45,6 @@
#include <sys/file.h>
#include <unistd.h>
#include <limits.h>
-#include <errno.h>
#include <sys/ioctl.h>
#include <sys/time.h>
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 670bab3a..3a8f1540 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -198,3 +198,47 @@ DPDK_17.05 {
vfio_get_group_no;
} DPDK_17.02;
+
+DPDK_17.08 {
+ global:
+
+ rte_bus_find;
+ rte_bus_find_by_device;
+ rte_bus_find_by_name;
+ rte_log_get_level;
+
+} DPDK_17.05;
+
+EXPERIMENTAL {
+ global:
+
+ rte_eal_devargs_insert;
+ rte_eal_devargs_parse;
+ rte_eal_devargs_remove;
+ rte_eal_hotplug_add;
+ rte_eal_hotplug_remove;
+ rte_service_disable_on_lcore;
+ rte_service_dump;
+ rte_service_enable_on_lcore;
+ rte_service_get_by_id;
+ rte_service_get_by_name;
+ rte_service_get_count;
+ rte_service_get_enabled_on_lcore;
+ rte_service_is_running;
+ rte_service_lcore_add;
+ rte_service_lcore_count;
+ rte_service_lcore_del;
+ rte_service_lcore_list;
+ rte_service_lcore_reset_all;
+ rte_service_lcore_start;
+ rte_service_lcore_stop;
+ rte_service_probe_capability;
+ rte_service_register;
+ rte_service_reset;
+ rte_service_set_stats_enable;
+ rte_service_start;
+ rte_service_start_with_defaults;
+ rte_service_stop;
+ rte_service_unregister;
+
+} DPDK_17.08;
diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
index b9d427c5..07a19a31 100644
--- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
+++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
@@ -170,6 +170,37 @@ igbuio_pci_irqhandler(int irq, struct uio_info *info)
return IRQ_HANDLED;
}
+/**
+ * This gets called while opening uio device file.
+ */
+static int
+igbuio_pci_open(struct uio_info *info, struct inode *inode)
+{
+ struct rte_uio_pci_dev *udev = info->priv;
+ struct pci_dev *dev = udev->pdev;
+
+ pci_reset_function(dev);
+
+ /* set bus master, which was cleared by the reset function */
+ pci_set_master(dev);
+
+ return 0;
+}
+
+static int
+igbuio_pci_release(struct uio_info *info, struct inode *inode)
+{
+ struct rte_uio_pci_dev *udev = info->priv;
+ struct pci_dev *dev = udev->pdev;
+
+ /* stop the device from further DMA */
+ pci_clear_master(dev);
+
+ pci_reset_function(dev);
+
+ return 0;
+}
+
#ifdef CONFIG_XEN_DOM0
static int
igbuio_dom0_mmap_phys(struct uio_info *info, struct vm_area_struct *vma)
@@ -372,6 +403,8 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
udev->info.version = "0.1";
udev->info.handler = igbuio_pci_irqhandler;
udev->info.irqcontrol = igbuio_pci_irqcontrol;
+ udev->info.open = igbuio_pci_open;
+ udev->info.release = igbuio_pci_release;
#ifdef CONFIG_XEN_DOM0
/* check if the driver run on Xen Dom0 */
if (xen_initial_domain())
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
index d558af20..1c30d12b 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
@@ -1357,7 +1357,7 @@ static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw,
* @hw: pointer to the HW structure
*
* In the case of serdes shut down sfp and PCS on driver unload
- * when management pass thru is not enabled.
+ * when management pass through is not enabled.
**/
void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw)
{
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
index 5f1f3a6b..99338c5c 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
@@ -1133,7 +1133,7 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter,
/* initialize pointer to rings */
ring = q_vector->ring;
- /* intialize ITR */
+ /* initialize ITR */
if (rxr_count) {
/* rx or rx/tx vector */
if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
index 4c52da3c..e0a03542 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
@@ -1165,7 +1165,7 @@ static inline u32 _kc_netif_msg_init(int debug_value, int default_msg_enable_bit
#define pci_register_driver pci_module_init
/*
- * Most of the dma compat code is copied/modifed from the 2.4.37
+ * Most of the dma compat code is copied/modified from the 2.4.37
* /include/linux/libata-compat.h header file
*/
/* These definitions mirror those in pci.h, so they can be used
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
index f00fe796..4808d06e 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
@@ -718,7 +718,7 @@ s32 ixgbe_update_eeprom_checksum(struct ixgbe_hw *hw)
* @vmdq: VMDq pool to assign
*
* Puts an ethernet address into a receive address register, or
- * finds the rar that it is aleady in; adds to the pool list
+ * finds the rar that it is already in; adds to the pool list
**/
s32 ixgbe_insert_mac_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
{
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
index 88b33fa0..2c861de5 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
@@ -3007,7 +3007,7 @@ u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw)
* @vmdq: VMDq pool to assign
*
* Puts an ethernet address into a receive address register, or
- * finds the rar that it is aleady in; adds to the pool list
+ * finds the rar that it is already in; adds to the pool list
**/
s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
{
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
index 4c7a6408..f62a7b56 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
@@ -1108,7 +1108,7 @@ static inline u32 _kc_netif_msg_init(int debug_value, int default_msg_enable_bit
#define pci_register_driver pci_module_init
/*
- * Most of the dma compat code is copied/modifed from the 2.4.37
+ * Most of the dma compat code is copied/modified from the 2.4.37
* /include/linux/libata-compat.h header file
*/
/* These definitions mirror those in pci.h, so they can be used
diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c
index f601d62e..4d9a0887 100644
--- a/lib/librte_efd/rte_efd.c
+++ b/lib/librte_efd/rte_efd.c
@@ -53,6 +53,8 @@
#include "rte_efd.h"
#if defined(RTE_ARCH_X86)
#include "rte_efd_x86.h"
+#elif defined(RTE_ARCH_ARM64)
+#include "rte_efd_arm64.h"
#endif
#define EFD_KEY(key_idx, table) (table->keys + ((key_idx) * table->key_len))
@@ -103,6 +105,7 @@ allocated memory
enum efd_lookup_internal_function {
EFD_LOOKUP_SCALAR = 0,
EFD_LOOKUP_AVX2,
+ EFD_LOOKUP_NEON,
EFD_LOOKUP_NUM
};
@@ -674,6 +677,16 @@ rte_efd_create(const char *name, uint32_t max_num_rules, uint32_t key_len,
table->lookup_fn = EFD_LOOKUP_AVX2;
else
#endif
+#if defined(RTE_ARCH_ARM64)
+ /*
+ * For less than or equal to 16 bits, scalar function performs better
+ * than vectorised version
+ */
+ if (RTE_EFD_VALUE_NUM_BITS > 16 &&
+ rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
+ table->lookup_fn = EFD_LOOKUP_NEON;
+ else
+#endif
table->lookup_fn = EFD_LOOKUP_SCALAR;
/*
@@ -1271,6 +1284,15 @@ efd_lookup_internal(const struct efd_online_group_entry * const group,
group->lookup_table,
hash_val_a,
hash_val_b);
+ break;
+#endif
+#if defined(RTE_ARCH_ARM64)
+ case EFD_LOOKUP_NEON:
+ return efd_lookup_internal_neon(group->hash_idx,
+ group->lookup_table,
+ hash_val_a,
+ hash_val_b);
+ break;
#endif
case EFD_LOOKUP_SCALAR:
/* Fall-through */
diff --git a/lib/librte_efd/rte_efd_arm64.h b/lib/librte_efd/rte_efd_arm64.h
new file mode 100644
index 00000000..63289ac4
--- /dev/null
+++ b/lib/librte_efd/rte_efd_arm64.h
@@ -0,0 +1,76 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium, Inc. 2017.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium, Inc nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * rte_efd_arm64.h
+ * This file holds all arm64 specific EFD functions
+ */
+
+#ifndef __RTE_EFD_ARM64_H__
+#define __RTE_EFD_ARM64_H__
+
+#include <rte_vect.h>
+
+static inline efd_value_t
+efd_lookup_internal_neon(const efd_hashfunc_t *group_hash_idx,
+ const efd_lookuptbl_t *group_lookup_table,
+ const uint32_t hash_val_a, const uint32_t hash_val_b)
+{
+ efd_value_t value = 0;
+ uint32_t i = 0;
+ uint32x4_t vhash_val_a = vmovq_n_u32(hash_val_a);
+ uint32x4_t vhash_val_b = vmovq_n_u32(hash_val_b);
+ int32x4_t vshift = {0, 1, 2, 3};
+ uint32x4_t vmask = vdupq_n_u32(0x1);
+ int32x4_t vincr = vdupq_n_s32(4);
+
+ for (; i < RTE_EFD_VALUE_NUM_BITS; i += 4) {
+ uint32x4_t vhash_idx = vshll_n_u16(
+ vld1_u16((uint16_t const *)&group_hash_idx[i]), 0);
+ uint32x4_t vlookup_table = vshll_n_u16(
+ vld1_u16((uint16_t const *)&group_lookup_table[i]), 0);
+ uint32x4_t vhash = vaddq_u32(vhash_val_a,
+ vmulq_u32(vhash_idx, vhash_val_b));
+ int32x4_t vbucket_idx = vnegq_s32(vreinterpretq_s32_u32(
+ vshrq_n_u32(vhash, EFD_LOOKUPTBL_SHIFT)));
+ uint32x4_t vresult = vshlq_u32(vlookup_table, vbucket_idx);
+
+ vresult = vandq_u32(vresult, vmask);
+ vresult = vshlq_u32(vresult, vshift);
+ value |= vaddvq_u32(vresult);
+ vshift = vaddq_s32(vshift, vincr);
+ }
+
+ return value;
+}
+
+#endif /* __RTE_EFD_ARM64_H__ */
diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile
index 93fdde10..db692ae4 100644
--- a/lib/librte_ether/Makefile
+++ b/lib/librte_ether/Makefile
@@ -1,6 +1,6 @@
# BSD LICENSE
#
-# Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+# Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@@ -45,6 +45,7 @@ LIBABIVER := 6
SRCS-y += rte_ethdev.c
SRCS-y += rte_flow.c
+SRCS-y += rte_tm.c
#
# Export include files
@@ -56,5 +57,7 @@ SYMLINK-y-include += rte_eth_ctrl.h
SYMLINK-y-include += rte_dev_info.h
SYMLINK-y-include += rte_flow.h
SYMLINK-y-include += rte_flow_driver.h
+SYMLINK-y-include += rte_tm.h
+SYMLINK-y-include += rte_tm_driver.h
include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 83898a8f..0597641e 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -72,7 +72,6 @@ static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
static struct rte_eth_dev_data *rte_eth_dev_data;
static uint8_t eth_dev_last_created_port;
-static uint8_t nb_ports;
/* spinlock for eth device callbacks */
static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER;
@@ -129,6 +128,7 @@ struct rte_eth_dev_callback {
TAILQ_ENTRY(rte_eth_dev_callback) next; /**< Callbacks list */
rte_eth_dev_cb_fn cb_fn; /**< Callback address */
void *cb_arg; /**< Parameter for callback */
+ void *ret_param; /**< Return parameter */
enum rte_eth_event_type event; /**< Interrupt event type */
uint32_t active; /**< Callback is executing */
};
@@ -178,9 +178,11 @@ rte_eth_dev_allocated(const char *name)
unsigned i;
for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
- if ((rte_eth_devices[i].state == RTE_ETH_DEV_ATTACHED) &&
- strcmp(rte_eth_devices[i].data->name, name) == 0)
- return &rte_eth_devices[i];
+ if (rte_eth_devices[i].state == RTE_ETH_DEV_ATTACHED &&
+ rte_eth_devices[i].device) {
+ if (!strcmp(rte_eth_devices[i].device->name, name))
+ return &rte_eth_devices[i];
+ }
}
return NULL;
}
@@ -207,7 +209,6 @@ eth_dev_get(uint8_t port_id)
TAILQ_INIT(&(eth_dev->link_intr_cbs));
eth_dev_last_created_port = port_id;
- nb_ports++;
return eth_dev;
}
@@ -280,7 +281,6 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
return -EINVAL;
eth_dev->state = RTE_ETH_DEV_UNUSED;
- nb_ports--;
return 0;
}
@@ -288,7 +288,8 @@ int
rte_eth_dev_is_valid_port(uint8_t port_id)
{
if (port_id >= RTE_MAX_ETHPORTS ||
- rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED)
+ (rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED &&
+ rte_eth_devices[port_id].state != RTE_ETH_DEV_DEFERRED))
return 0;
else
return 1;
@@ -304,13 +305,21 @@ rte_eth_dev_socket_id(uint8_t port_id)
uint8_t
rte_eth_dev_count(void)
{
- return nb_ports;
+ uint8_t p;
+ uint8_t count;
+
+ count = 0;
+
+ RTE_ETH_FOREACH_DEV(p)
+ count++;
+
+ return count;
}
int
rte_eth_dev_get_name_by_port(uint8_t port_id, char *name)
{
- char *tmp;
+ const char *tmp;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
@@ -321,7 +330,7 @@ rte_eth_dev_get_name_by_port(uint8_t port_id, char *name)
/* shouldn't check 'rte_eth_devices[i].data',
* because it might be overwritten by VDEV PMD */
- tmp = rte_eth_dev_data[port_id].name;
+ tmp = rte_eth_devices[port_id].device->name;
strcpy(name, tmp);
return 0;
}
@@ -329,6 +338,7 @@ rte_eth_dev_get_name_by_port(uint8_t port_id, char *name)
int
rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id)
{
+ int ret;
int i;
if (name == NULL) {
@@ -336,16 +346,14 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id)
return -EINVAL;
}
- if (!nb_ports)
- return -ENODEV;
-
- *port_id = RTE_MAX_ETHPORTS;
RTE_ETH_FOREACH_DEV(i) {
- if (!strncmp(name,
- rte_eth_dev_data[i].name, strlen(name))) {
+ if (!rte_eth_devices[i].device)
+ continue;
+ ret = strncmp(name, rte_eth_devices[i].device->name,
+ strlen(name));
+ if (ret == 0) {
*port_id = i;
-
return 0;
}
}
@@ -359,16 +367,6 @@ rte_eth_dev_is_detachable(uint8_t port_id)
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
- switch (rte_eth_devices[port_id].data->kdrv) {
- case RTE_KDRV_IGB_UIO:
- case RTE_KDRV_UIO_GENERIC:
- case RTE_KDRV_NIC_UIO:
- case RTE_KDRV_NONE:
- case RTE_KDRV_VFIO:
- break;
- default:
- return -ENOTSUP;
- }
dev_flags = rte_eth_devices[port_id].data->dev_flags;
if ((dev_flags & RTE_ETH_DEV_DETACHABLE) &&
(!(dev_flags & RTE_ETH_DEV_BONDED_SLAVE)))
@@ -438,12 +436,14 @@ rte_eth_dev_detach(uint8_t port_id, char *name)
if (rte_eth_dev_is_detachable(port_id))
goto err;
- snprintf(name, sizeof(rte_eth_devices[port_id].data->name),
- "%s", rte_eth_devices[port_id].data->name);
- ret = rte_eal_dev_detach(name);
+ snprintf(name, RTE_DEV_NAME_MAX_LEN, "%s",
+ rte_eth_devices[port_id].device->name);
+
+ ret = rte_eal_dev_detach(rte_eth_devices[port_id].device);
if (ret < 0)
goto err;
+ rte_eth_devices[port_id].state = RTE_ETH_DEV_UNUSED;
return 0;
err:
@@ -753,13 +753,13 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
if ((dev_conf->intr_conf.lsc == 1) &&
(!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
RTE_PMD_DEBUG_TRACE("driver %s does not support lsc\n",
- dev->data->drv_name);
+ dev->device->driver->name);
return -EINVAL;
}
if ((dev_conf->intr_conf.rmv == 1) &&
(!(dev->data->dev_flags & RTE_ETH_DEV_INTR_RMV))) {
RTE_PMD_DEBUG_TRACE("driver %s does not support rmv\n",
- dev->data->drv_name);
+ dev->device->driver->name);
return -EINVAL;
}
@@ -1900,7 +1900,7 @@ rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info)
RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get);
(*dev->dev_ops->dev_infos_get)(dev, dev_info);
- dev_info->driver_name = dev->data->drv_name;
+ dev_info->driver_name = dev->device->driver->name;
dev_info->nb_rx_queues = dev->data->nb_rx_queues;
dev_info->nb_tx_queues = dev->data->nb_tx_queues;
}
@@ -1975,6 +1975,7 @@ int
rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
{
struct rte_eth_dev *dev;
+ int ret;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
dev = &rte_eth_devices[port_id];
@@ -1990,7 +1991,23 @@ rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP);
- return (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on);
+ ret = (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on);
+ if (ret == 0) {
+ struct rte_vlan_filter_conf *vfc;
+ int vidx;
+ int vbit;
+
+ vfc = &dev->data->vlan_filter_conf;
+ vidx = vlan_id / 64;
+ vbit = vlan_id % 64;
+
+ if (on)
+ vfc->ids[vidx] |= UINT64_C(1) << vbit;
+ else
+ vfc->ids[vidx] &= ~(UINT64_C(1) << vbit);
+ }
+
+ return ret;
}
int
@@ -2351,6 +2368,7 @@ get_mac_addr_index(uint8_t port_id, const struct ether_addr *addr)
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
unsigned i;
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
rte_eth_dev_info_get(port_id, &dev_info);
for (i = 0; i < dev_info.max_mac_addrs; i++)
@@ -2718,12 +2736,13 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
return ret;
}
-void
+int
_rte_eth_dev_callback_process(struct rte_eth_dev *dev,
- enum rte_eth_event_type event, void *cb_arg)
+ enum rte_eth_event_type event, void *cb_arg, void *ret_param)
{
struct rte_eth_dev_callback *cb_lst;
struct rte_eth_dev_callback dev_cb;
+ int rc = 0;
rte_spinlock_lock(&rte_eth_dev_cb_lock);
TAILQ_FOREACH(cb_lst, &(dev->link_intr_cbs), next) {
@@ -2733,14 +2752,17 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
cb_lst->active = 1;
if (cb_arg != NULL)
dev_cb.cb_arg = cb_arg;
+ if (ret_param != NULL)
+ dev_cb.ret_param = ret_param;
rte_spinlock_unlock(&rte_eth_dev_cb_lock);
- dev_cb.cb_fn(dev->data->port_id, dev_cb.event,
- dev_cb.cb_arg);
+ rc = dev_cb.cb_fn(dev->data->port_id, dev_cb.event,
+ dev_cb.cb_arg, dev_cb.ret_param);
rte_spinlock_lock(&rte_eth_dev_cb_lock);
cb_lst->active = 0;
}
rte_spinlock_unlock(&rte_eth_dev_cb_lock);
+ return rc;
}
int
@@ -2789,7 +2811,7 @@ rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name,
const struct rte_memzone *mz;
snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
- dev->data->drv_name, ring_name,
+ dev->device->driver->name, ring_name,
dev->data->port_id, queue_id);
mz = rte_memzone_lookup(z_name);
@@ -2872,128 +2894,6 @@ rte_eth_dev_rx_intr_disable(uint8_t port_id,
return (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id);
}
-#ifdef RTE_NIC_BYPASS
-int rte_eth_dev_bypass_init(uint8_t port_id)
-{
- struct rte_eth_dev *dev;
-
- RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
-
- dev = &rte_eth_devices[port_id];
- RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_init, -ENOTSUP);
- (*dev->dev_ops->bypass_init)(dev);
- return 0;
-}
-
-int
-rte_eth_dev_bypass_state_show(uint8_t port_id, uint32_t *state)
-{
- struct rte_eth_dev *dev;
-
- RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
-
- dev = &rte_eth_devices[port_id];
- RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_state_show, -ENOTSUP);
- (*dev->dev_ops->bypass_state_show)(dev, state);
- return 0;
-}
-
-int
-rte_eth_dev_bypass_state_set(uint8_t port_id, uint32_t *new_state)
-{
- struct rte_eth_dev *dev;
-
- RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
-
- dev = &rte_eth_devices[port_id];
- RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_state_set, -ENOTSUP);
- (*dev->dev_ops->bypass_state_set)(dev, new_state);
- return 0;
-}
-
-int
-rte_eth_dev_bypass_event_show(uint8_t port_id, uint32_t event, uint32_t *state)
-{
- struct rte_eth_dev *dev;
-
- RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
-
- dev = &rte_eth_devices[port_id];
- RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_state_show, -ENOTSUP);
- (*dev->dev_ops->bypass_event_show)(dev, event, state);
- return 0;
-}
-
-int
-rte_eth_dev_bypass_event_store(uint8_t port_id, uint32_t event, uint32_t state)
-{
- struct rte_eth_dev *dev;
-
- RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
-
- dev = &rte_eth_devices[port_id];
-
- RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_event_set, -ENOTSUP);
- (*dev->dev_ops->bypass_event_set)(dev, event, state);
- return 0;
-}
-
-int
-rte_eth_dev_wd_timeout_store(uint8_t port_id, uint32_t timeout)
-{
- struct rte_eth_dev *dev;
-
- RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
-
- dev = &rte_eth_devices[port_id];
-
- RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_wd_timeout_set, -ENOTSUP);
- (*dev->dev_ops->bypass_wd_timeout_set)(dev, timeout);
- return 0;
-}
-
-int
-rte_eth_dev_bypass_ver_show(uint8_t port_id, uint32_t *ver)
-{
- struct rte_eth_dev *dev;
-
- RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
-
- dev = &rte_eth_devices[port_id];
-
- RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_ver_show, -ENOTSUP);
- (*dev->dev_ops->bypass_ver_show)(dev, ver);
- return 0;
-}
-
-int
-rte_eth_dev_bypass_wd_timeout_show(uint8_t port_id, uint32_t *wd_timeout)
-{
- struct rte_eth_dev *dev;
-
- RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
-
- dev = &rte_eth_devices[port_id];
-
- RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_wd_timeout_show, -ENOTSUP);
- (*dev->dev_ops->bypass_wd_timeout_show)(dev, wd_timeout);
- return 0;
-}
-
-int
-rte_eth_dev_bypass_wd_reset(uint8_t port_id)
-{
- struct rte_eth_dev *dev;
-
- RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
-
- dev = &rte_eth_devices[port_id];
-
- RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_wd_reset, -ENOTSUP);
- (*dev->dev_ops->bypass_wd_reset)(dev);
- return 0;
-}
-#endif
int
rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_type)
@@ -3472,3 +3372,40 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
-ENOTSUP);
return (*dev->dev_ops->l2_tunnel_offload_set)(dev, l2_tunnel, mask, en);
}
+
+static void
+rte_eth_dev_adjust_nb_desc(uint16_t *nb_desc,
+ const struct rte_eth_desc_lim *desc_lim)
+{
+ if (desc_lim->nb_align != 0)
+ *nb_desc = RTE_ALIGN_CEIL(*nb_desc, desc_lim->nb_align);
+
+ if (desc_lim->nb_max != 0)
+ *nb_desc = RTE_MIN(*nb_desc, desc_lim->nb_max);
+
+ *nb_desc = RTE_MAX(*nb_desc, desc_lim->nb_min);
+}
+
+int
+rte_eth_dev_adjust_nb_rx_tx_desc(uint8_t port_id,
+ uint16_t *nb_rx_desc,
+ uint16_t *nb_tx_desc)
+{
+ struct rte_eth_dev *dev;
+ struct rte_eth_dev_info dev_info;
+
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+ dev = &rte_eth_devices[port_id];
+ RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
+
+ rte_eth_dev_info_get(port_id, &dev_info);
+
+ if (nb_rx_desc != NULL)
+ rte_eth_dev_adjust_nb_desc(nb_rx_desc, &dev_info.rx_desc_lim);
+
+ if (nb_tx_desc != NULL)
+ rte_eth_dev_adjust_nb_desc(nb_tx_desc, &dev_info.tx_desc_lim);
+
+ return 0;
+}
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 0f38b45f..0adf3274 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -118,7 +118,7 @@
* - NIC queue statistics mappings
*
* Any other configuration will not be stored and will need to be re-entered
- * after a call to rte_eth_dev_start().
+ * before a call to rte_eth_dev_start().
*
* Finally, a network application can close an Ethernet device by invoking the
* rte_eth_dev_close() function.
@@ -172,8 +172,6 @@ extern "C" {
#include <stdint.h>
-#include <rte_dev.h>
-
/* Use this macro to check if LRO API is supported */
#define RTE_ETHDEV_HAS_LRO_SUPPORT
@@ -374,6 +372,14 @@ enum rte_vlan_type {
};
/**
+ * A structure used to describe a vlan filter.
+ * If the bit corresponding to a VID is set, such VID is on.
+ */
+struct rte_vlan_filter_conf {
+ uint64_t ids[64];
+};
+
+/**
* A structure used to configure the Receive Side Scaling (RSS) feature
* of an Ethernet port.
* If not NULL, the *rss_key* pointer of the *rss_conf* structure points
@@ -629,6 +635,24 @@ struct rte_eth_vmdq_dcb_conf {
/**< Selects a queue in a pool */
};
+/**
+ * A structure used to configure the VMDQ feature of an Ethernet port when
+ * not combined with the DCB feature.
+ *
+ * Using this feature, packets are routed to a pool of queues. By default,
+ * the pool selection is based on the MAC address, the vlan id in the
+ * vlan tag as specified in the pool_map array.
+ * Passing the ETH_VMDQ_ACCEPT_UNTAG in the rx_mode field allows pool
+ * selection using only the MAC address. MAC address to pool mapping is done
+ * using the rte_eth_dev_mac_addr_add function, with the pool parameter
+ * corresponding to the pool id.
+ *
+ * Queue selection within the selected pool will be done using RSS when
+ * it is enabled or revert to the first queue of the pool if not.
+ *
+ * A default pool may be used, if desired, to route all traffic which
+ * does not match the vlan filter rules or any pool MAC address.
+ */
struct rte_eth_vmdq_rx_conf {
enum rte_eth_nb_pools nb_queue_pools; /**< VMDq only mode, 8 or 64 pools */
uint8_t enable_default_pool; /**< If non-zero, use a default pool */
@@ -901,6 +925,10 @@ struct rte_eth_conf {
#define DEV_TX_OFFLOAD_IPIP_TNL_TSO 0x00000800 /**< Used for tunneling packet. */
#define DEV_TX_OFFLOAD_GENEVE_TNL_TSO 0x00001000 /**< Used for tunneling packet. */
#define DEV_TX_OFFLOAD_MACSEC_INSERT 0x00002000
+#define DEV_TX_OFFLOAD_MT_LOCKFREE 0x00004000
+/**< Multiple threads can invoke rte_eth_tx_burst() concurrently on the same
+ * tx queue without SW lock.
+ */
struct rte_pci_device;
@@ -1048,6 +1076,8 @@ TAILQ_HEAD(rte_eth_dev_cb_list, rte_eth_dev_callback);
} \
} while (0)
+#define RTE_ETH_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device)
+
/**
* l2 tunnel configuration.
*/
@@ -1381,59 +1411,6 @@ typedef int (*eth_l2_tunnel_offload_set_t)
uint8_t en);
/**< @internal enable/disable the l2 tunnel offload functions */
-#ifdef RTE_NIC_BYPASS
-
-enum {
- RTE_BYPASS_MODE_NONE,
- RTE_BYPASS_MODE_NORMAL,
- RTE_BYPASS_MODE_BYPASS,
- RTE_BYPASS_MODE_ISOLATE,
- RTE_BYPASS_MODE_NUM,
-};
-
-#define RTE_BYPASS_MODE_VALID(x) \
- ((x) > RTE_BYPASS_MODE_NONE && (x) < RTE_BYPASS_MODE_NUM)
-
-enum {
- RTE_BYPASS_EVENT_NONE,
- RTE_BYPASS_EVENT_START,
- RTE_BYPASS_EVENT_OS_ON = RTE_BYPASS_EVENT_START,
- RTE_BYPASS_EVENT_POWER_ON,
- RTE_BYPASS_EVENT_OS_OFF,
- RTE_BYPASS_EVENT_POWER_OFF,
- RTE_BYPASS_EVENT_TIMEOUT,
- RTE_BYPASS_EVENT_NUM
-};
-
-#define RTE_BYPASS_EVENT_VALID(x) \
- ((x) > RTE_BYPASS_EVENT_NONE && (x) < RTE_BYPASS_MODE_NUM)
-
-enum {
- RTE_BYPASS_TMT_OFF, /* timeout disabled. */
- RTE_BYPASS_TMT_1_5_SEC, /* timeout for 1.5 seconds */
- RTE_BYPASS_TMT_2_SEC, /* timeout for 2 seconds */
- RTE_BYPASS_TMT_3_SEC, /* timeout for 3 seconds */
- RTE_BYPASS_TMT_4_SEC, /* timeout for 4 seconds */
- RTE_BYPASS_TMT_8_SEC, /* timeout for 8 seconds */
- RTE_BYPASS_TMT_16_SEC, /* timeout for 16 seconds */
- RTE_BYPASS_TMT_32_SEC, /* timeout for 32 seconds */
- RTE_BYPASS_TMT_NUM
-};
-
-#define RTE_BYPASS_TMT_VALID(x) \
- ((x) == RTE_BYPASS_TMT_OFF || \
- ((x) > RTE_BYPASS_TMT_OFF && (x) < RTE_BYPASS_TMT_NUM))
-
-typedef void (*bypass_init_t)(struct rte_eth_dev *dev);
-typedef int32_t (*bypass_state_set_t)(struct rte_eth_dev *dev, uint32_t *new_state);
-typedef int32_t (*bypass_state_show_t)(struct rte_eth_dev *dev, uint32_t *state);
-typedef int32_t (*bypass_event_set_t)(struct rte_eth_dev *dev, uint32_t state, uint32_t event);
-typedef int32_t (*bypass_event_show_t)(struct rte_eth_dev *dev, uint32_t event_shift, uint32_t *event);
-typedef int32_t (*bypass_wd_timeout_set_t)(struct rte_eth_dev *dev, uint32_t timeout);
-typedef int32_t (*bypass_wd_timeout_show_t)(struct rte_eth_dev *dev, uint32_t *wd_timeout);
-typedef int32_t (*bypass_ver_show_t)(struct rte_eth_dev *dev, uint32_t *ver);
-typedef int32_t (*bypass_wd_reset_t)(struct rte_eth_dev *dev);
-#endif
typedef int (*eth_filter_ctrl_t)(struct rte_eth_dev *dev,
enum rte_filter_type filter_type,
@@ -1441,6 +1418,9 @@ typedef int (*eth_filter_ctrl_t)(struct rte_eth_dev *dev,
void *arg);
/**< @internal Take operations to assigned filter type on an Ethernet device */
+typedef int (*eth_tm_ops_get_t)(struct rte_eth_dev *dev, void *ops);
+/**< @internal Get Traffic Management (TM) operations on an Ethernet device */
+
typedef int (*eth_get_dcb_info)(struct rte_eth_dev *dev,
struct rte_eth_dcb_info *dcb_info);
/**< @internal Get dcb information on an Ethernet device */
@@ -1460,7 +1440,7 @@ struct eth_dev_ops {
eth_promiscuous_enable_t promiscuous_enable; /**< Promiscuous ON. */
eth_promiscuous_disable_t promiscuous_disable;/**< Promiscuous OFF. */
eth_allmulticast_enable_t allmulticast_enable;/**< RX multicast ON. */
- eth_allmulticast_disable_t allmulticast_disable;/**< RX multicast OF. */
+ eth_allmulticast_disable_t allmulticast_disable;/**< RX multicast OFF. */
eth_mac_addr_remove_t mac_addr_remove; /**< Remove MAC address. */
eth_mac_addr_add_t mac_addr_add; /**< Add a MAC address. */
eth_mac_addr_set_t mac_addr_set; /**< Set a MAC address. */
@@ -1540,18 +1520,6 @@ struct eth_dev_ops {
eth_get_eeprom_t get_eeprom; /**< Get eeprom data. */
eth_set_eeprom_t set_eeprom; /**< Set eeprom. */
- /* bypass control */
-#ifdef RTE_NIC_BYPASS
- bypass_init_t bypass_init;
- bypass_state_set_t bypass_state_set;
- bypass_state_show_t bypass_state_show;
- bypass_event_set_t bypass_event_set;
- bypass_event_show_t bypass_event_show;
- bypass_wd_timeout_set_t bypass_wd_timeout_set;
- bypass_wd_timeout_show_t bypass_wd_timeout_show;
- bypass_ver_show_t bypass_ver_show;
- bypass_wd_reset_t bypass_wd_reset;
-#endif
eth_filter_ctrl_t filter_ctrl; /**< common filter control. */
@@ -1573,6 +1541,9 @@ struct eth_dev_ops {
/**< Get extended device statistic values by ID. */
eth_xstats_get_names_by_id_t xstats_get_names_by_id;
/**< Get name of extended device statistics by ID. */
+
+ eth_tm_ops_get_t tm_ops_get;
+ /**< Get Traffic Management (TM) operations. */
};
/**
@@ -1644,6 +1615,7 @@ struct rte_eth_rxtx_callback {
enum rte_eth_dev_state {
RTE_ETH_DEV_UNUSED = 0,
RTE_ETH_DEV_ATTACHED,
+ RTE_ETH_DEV_DEFERRED,
};
/**
@@ -1687,7 +1659,7 @@ struct rte_eth_dev_sriov {
};
#define RTE_ETH_DEV_SRIOV(dev) ((dev)->data->sriov)
-#define RTE_ETH_NAME_MAX_LEN (32)
+#define RTE_ETH_NAME_MAX_LEN RTE_DEV_NAME_MAX_LEN
/**
* @internal
@@ -1737,7 +1709,8 @@ struct rte_eth_dev_data {
uint32_t dev_flags; /**< Capabilities */
enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */
int numa_node; /**< NUMA node connection */
- const char *drv_name; /**< Driver name */
+ struct rte_vlan_filter_conf vlan_filter_conf;
+ /**< VLAN filter configuration. */
};
/** Device supports hotplug detach */
@@ -1777,13 +1750,12 @@ uint8_t rte_eth_find_next(uint8_t port_id);
/**
* Get the total number of Ethernet devices that have been successfully
- * initialized by the [matching] Ethernet driver during the PCI probing phase.
- * All devices whose port identifier is in the range
- * [0, rte_eth_dev_count() - 1] can be operated on by network applications
- * immediately after invoking rte_eal_init().
- * If the application unplugs a port using hotplug function, The enabled port
- * numbers may be noncontiguous. In the case, the applications need to manage
- * enabled port by using the ``RTE_ETH_FOREACH_DEV()`` macro.
+ * initialized by the matching Ethernet driver during the PCI probing phase
+ * and that are available for applications to use. These devices must be
+ * accessed by using the ``RTE_ETH_FOREACH_DEV()`` macro to deal with
+ * non-contiguous ranges of devices.
+ * These non-contiguous ranges can be created by calls to hotplug functions or
+ * by some PMDs.
*
* @return
* - The total number of usable Ethernet devices.
@@ -1859,7 +1831,8 @@ int rte_eth_dev_attach(const char *devargs, uint8_t *port_id);
* @param port_id
* The port identifier of the device to detach.
* @param devname
- * A pointer to a device name actually detached.
+ * A pointer to a buffer that will be filled with the device name.
+ * This buffer must be at least RTE_DEV_NAME_MAX_LEN long.
* @return
* 0 on success and devname is filled, negative on error
*/
@@ -2358,7 +2331,7 @@ rte_eth_xstats_get_names_by_id(uint8_t port_id,
* @param port_id
* The port identifier of the Ethernet device.
* @param ids
- * A pointer to an ids array passed by application. This tells wich
+ * A pointer to an ids array passed by application. This tells which
* statistics values function should retrieve. This parameter
* can be set to NULL if n is 0. In this case function will retrieve
* all avalible statistics.
@@ -2997,6 +2970,10 @@ static inline int rte_eth_tx_descriptor_status(uint8_t port_id,
* rte_eth_tx_burst() function must [attempt to] free the *rte_mbuf* buffers
* of those packets whose transmission was effectively completed.
*
+ * If the PMD is DEV_TX_OFFLOAD_MT_LOCKFREE capable, multiple threads can
+ * invoke this function concurrently on the same tx queue without SW lock.
+ * @see rte_eth_dev_info_get, struct rte_eth_txconf::txq_flags
+ *
* @param port_id
* The port identifier of the Ethernet device.
* @param queue_id
@@ -3266,7 +3243,7 @@ rte_eth_tx_buffer_flush(uint8_t port_id, uint16_t queue_id,
* causing N packets to be sent, and the error callback to be called for
* the rest.
*/
-static inline uint16_t __attribute__((always_inline))
+static __rte_always_inline uint16_t
rte_eth_tx_buffer(uint8_t port_id, uint16_t queue_id,
struct rte_eth_dev_tx_buffer *buffer, struct rte_mbuf *tx_pkt)
{
@@ -3401,8 +3378,8 @@ enum rte_eth_event_type {
RTE_ETH_EVENT_MAX /**< max value of this enum */
};
-typedef void (*rte_eth_dev_cb_fn)(uint8_t port_id, \
- enum rte_eth_event_type event, void *cb_arg);
+typedef int (*rte_eth_dev_cb_fn)(uint8_t port_id,
+ enum rte_eth_event_type event, void *cb_arg, void *ret_param);
/**< user application callback to be registered for interrupts */
@@ -3419,11 +3396,6 @@ typedef void (*rte_eth_dev_cb_fn)(uint8_t port_id, \
* @param cb_arg
* Pointer to the parameters for the registered callback.
*
- * The user data is overwritten in the case of RTE_ETH_EVENT_VF_MBOX.
- * This even occurs when a message from the VF is received by the PF.
- * The user data is overwritten with struct rte_pmd_ixgbe_mb_event_param.
- * This struct is defined in rte_pmd_ixgbe.h.
- *
* @return
* - On success, zero.
* - On failure, a negative value.
@@ -3463,15 +3435,17 @@ int rte_eth_dev_callback_unregister(uint8_t port_id,
* @param event
* Eth device interrupt event type.
* @param cb_arg
- * Update callback parameter to pass data back to user application.
+ * callback parameter.
+ * @param ret_param
+ * To pass data back to user application.
* This allows the user application to decide if a particular function
* is permitted or not.
*
* @return
- * void
+ * int
*/
-void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
- enum rte_eth_event_type event, void *cb_arg);
+int _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
+ enum rte_eth_event_type event, void *cb_arg, void *ret_param);
/**
* When there is no rx packet coming in Rx Queue for a long time, we can
@@ -3827,171 +3801,6 @@ int rte_eth_mirror_rule_reset(uint8_t port_id,
int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx,
uint16_t tx_rate);
-/**
- * Initialize bypass logic. This function needs to be called before
- * executing any other bypass API.
- *
- * @param port
- * The port identifier of the Ethernet device.
- * @return
- * - (0) if successful.
- * - (-ENOTSUP) if hardware doesn't support.
- * - (-EINVAL) if bad parameter.
- */
-int rte_eth_dev_bypass_init(uint8_t port);
-
-/**
- * Return bypass state.
- *
- * @param port
- * The port identifier of the Ethernet device.
- * @param state
- * The return bypass state.
- * - (1) Normal mode
- * - (2) Bypass mode
- * - (3) Isolate mode
- * @return
- * - (0) if successful.
- * - (-ENOTSUP) if hardware doesn't support.
- * - (-EINVAL) if bad parameter.
- */
-int rte_eth_dev_bypass_state_show(uint8_t port, uint32_t *state);
-
-/**
- * Set bypass state
- *
- * @param port
- * The port identifier of the Ethernet device.
- * @param new_state
- * The current bypass state.
- * - (1) Normal mode
- * - (2) Bypass mode
- * - (3) Isolate mode
- * @return
- * - (0) if successful.
- * - (-ENOTSUP) if hardware doesn't support.
- * - (-EINVAL) if bad parameter.
- */
-int rte_eth_dev_bypass_state_set(uint8_t port, uint32_t *new_state);
-
-/**
- * Return bypass state when given event occurs.
- *
- * @param port
- * The port identifier of the Ethernet device.
- * @param event
- * The bypass event
- * - (1) Main power on (power button is pushed)
- * - (2) Auxiliary power on (power supply is being plugged)
- * - (3) Main power off (system shutdown and power supply is left plugged in)
- * - (4) Auxiliary power off (power supply is being unplugged)
- * - (5) Display or set the watchdog timer
- * @param state
- * The bypass state when given event occurred.
- * - (1) Normal mode
- * - (2) Bypass mode
- * - (3) Isolate mode
- * @return
- * - (0) if successful.
- * - (-ENOTSUP) if hardware doesn't support.
- * - (-EINVAL) if bad parameter.
- */
-int rte_eth_dev_bypass_event_show(uint8_t port, uint32_t event, uint32_t *state);
-
-/**
- * Set bypass state when given event occurs.
- *
- * @param port
- * The port identifier of the Ethernet device.
- * @param event
- * The bypass event
- * - (1) Main power on (power button is pushed)
- * - (2) Auxiliary power on (power supply is being plugged)
- * - (3) Main power off (system shutdown and power supply is left plugged in)
- * - (4) Auxiliary power off (power supply is being unplugged)
- * - (5) Display or set the watchdog timer
- * @param state
- * The assigned state when given event occurs.
- * - (1) Normal mode
- * - (2) Bypass mode
- * - (3) Isolate mode
- * @return
- * - (0) if successful.
- * - (-ENOTSUP) if hardware doesn't support.
- * - (-EINVAL) if bad parameter.
- */
-int rte_eth_dev_bypass_event_store(uint8_t port, uint32_t event, uint32_t state);
-
-/**
- * Set bypass watchdog timeout count.
- *
- * @param port
- * The port identifier of the Ethernet device.
- * @param timeout
- * The timeout to be set.
- * - (0) 0 seconds (timer is off)
- * - (1) 1.5 seconds
- * - (2) 2 seconds
- * - (3) 3 seconds
- * - (4) 4 seconds
- * - (5) 8 seconds
- * - (6) 16 seconds
- * - (7) 32 seconds
- * @return
- * - (0) if successful.
- * - (-ENOTSUP) if hardware doesn't support.
- * - (-EINVAL) if bad parameter.
- */
-int rte_eth_dev_wd_timeout_store(uint8_t port, uint32_t timeout);
-
-/**
- * Get bypass firmware version.
- *
- * @param port
- * The port identifier of the Ethernet device.
- * @param ver
- * The firmware version
- * @return
- * - (0) if successful.
- * - (-ENOTSUP) if hardware doesn't support.
- * - (-EINVAL) if bad parameter.
- */
-int rte_eth_dev_bypass_ver_show(uint8_t port, uint32_t *ver);
-
-/**
- * Return bypass watchdog timeout in seconds
- *
- * @param port
- * The port identifier of the Ethernet device.
- * @param wd_timeout
- * The return watchdog timeout. "0" represents timer expired
- * - (0) 0 seconds (timer is off)
- * - (1) 1.5 seconds
- * - (2) 2 seconds
- * - (3) 3 seconds
- * - (4) 4 seconds
- * - (5) 8 seconds
- * - (6) 16 seconds
- * - (7) 32 seconds
- * @return
- * - (0) if successful.
- * - (-ENOTSUP) if hardware doesn't support.
- * - (-EINVAL) if bad parameter.
- */
-int rte_eth_dev_bypass_wd_timeout_show(uint8_t port, uint32_t *wd_timeout);
-
-/**
- * Reset bypass watchdog timer
- *
- * @param port
- * The port identifier of the Ethernet device.
- * @return
- * - (0) if successful.
- * - (-ENOTSUP) if hardware doesn't support.
- * - (-EINVAL) if bad parameter.
- */
-int rte_eth_dev_bypass_wd_reset(uint8_t port);
-
/**
* Configuration of Receive Side Scaling hash computation of Ethernet device.
*
@@ -4587,7 +4396,7 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
* @param port_id
* pointer to port identifier of the device
* @return
-* - (0) if successful.
+* - (0) if successful and port_id is filled.
* - (-ENODEV or -EINVAL) on failure.
*/
int
@@ -4607,6 +4416,26 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id);
int
rte_eth_dev_get_name_by_port(uint8_t port_id, char *name);
+/**
+ * Check that numbers of Rx and Tx descriptors satisfy descriptors limits from
+ * the ethernet device information, otherwise adjust them to boundaries.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param nb_rx_desc
+ * A pointer to a uint16_t where the number of receive
+ * descriptors stored.
+ * @param nb_tx_desc
+ * A pointer to a uint16_t where the number of transmit
+ * descriptors stored.
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP, -ENODEV or -EINVAL) on failure.
+ */
+int rte_eth_dev_adjust_nb_rx_tx_desc(uint8_t port_id,
+ uint16_t *nb_rx_desc,
+ uint16_t *nb_tx_desc);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ether/rte_ethdev_pci.h b/lib/librte_ether/rte_ethdev_pci.h
index d3bc03cf..56b10721 100644
--- a/lib/librte_ether/rte_ethdev_pci.h
+++ b/lib/librte_ether/rte_ethdev_pci.h
@@ -45,9 +45,6 @@
* The *eth_dev* pointer is the address of the *rte_eth_dev* structure.
* @param pci_dev
* The *pci_dev* pointer is the address of the *rte_pci_device* structure.
- *
- * @return
- * - 0 on success, negative on error
*/
static inline void
rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev,
@@ -69,7 +66,6 @@ rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev,
eth_dev->data->kdrv = pci_dev->kdrv;
eth_dev->data->numa_node = pci_dev->device.numa_node;
- eth_dev->data->drv_name = pci_dev->driver->driver.name;
}
/**
@@ -118,7 +114,6 @@ rte_eth_dev_pci_allocate(struct rte_pci_device *dev, size_t private_data_size)
}
eth_dev->device = &dev->device;
- eth_dev->intr_handle = &dev->intr_handle;
rte_eth_copy_pci_info(eth_dev, dev);
return eth_dev;
}
@@ -134,6 +129,12 @@ rte_eth_dev_pci_release(struct rte_eth_dev *eth_dev)
eth_dev->data->dev_private = NULL;
+ /*
+ * Secondary process will check the name to attach.
+ * Clear this field to avoid attaching a released ports.
+ */
+ eth_dev->data->name[0] = '\0';
+
eth_dev->device = NULL;
eth_dev->intr_handle = NULL;
}
diff --git a/lib/librte_ether/rte_ethdev_vdev.h b/lib/librte_ether/rte_ethdev_vdev.h
index fa2cb61e..4d2c3e2b 100644
--- a/lib/librte_ether/rte_ethdev_vdev.h
+++ b/lib/librte_ether/rte_ethdev_vdev.h
@@ -77,7 +77,6 @@ rte_eth_vdev_allocate(struct rte_vdev_device *dev, size_t private_data_size)
eth_dev->data->kdrv = RTE_KDRV_NONE;
eth_dev->data->numa_node = dev->device.numa_node;
- eth_dev->data->drv_name = dev->device.driver->name;
return eth_dev;
}
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index d6726bb1..42837285 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -1,7 +1,6 @@
DPDK_2.2 {
global:
- _rte_eth_dev_callback_process;
rte_eth_add_rx_callback;
rte_eth_add_tx_callback;
rte_eth_allmulticast_disable;
@@ -10,14 +9,6 @@ DPDK_2.2 {
rte_eth_dev_allocate;
rte_eth_dev_allocated;
rte_eth_dev_attach;
- rte_eth_dev_bypass_event_show;
- rte_eth_dev_bypass_event_store;
- rte_eth_dev_bypass_init;
- rte_eth_dev_bypass_state_set;
- rte_eth_dev_bypass_state_show;
- rte_eth_dev_bypass_ver_show;
- rte_eth_dev_bypass_wd_reset;
- rte_eth_dev_bypass_wd_timeout_show;
rte_eth_dev_callback_register;
rte_eth_dev_callback_unregister;
rte_eth_dev_close;
@@ -70,7 +61,6 @@ DPDK_2.2 {
rte_eth_dev_uc_all_hash_table_set;
rte_eth_dev_uc_hash_table_set;
rte_eth_dev_vlan_filter;
- rte_eth_dev_wd_timeout_store;
rte_eth_dma_zone_reserve;
rte_eth_led_off;
rte_eth_led_on;
@@ -151,8 +141,49 @@ DPDK_17.05 {
rte_eth_dev_attach_secondary;
rte_eth_find_next;
+ rte_eth_tx_done_cleanup;
rte_eth_xstats_get_by_id;
rte_eth_xstats_get_id_by_name;
rte_eth_xstats_get_names_by_id;
} DPDK_17.02;
+
+DPDK_17.08 {
+ global:
+
+ _rte_eth_dev_callback_process;
+ rte_eth_dev_adjust_nb_rx_tx_desc;
+ rte_flow_copy;
+ rte_flow_isolate;
+ rte_tm_capabilities_get;
+ rte_tm_get_leaf_nodes;
+ rte_tm_hierarchy_commit;
+ rte_tm_level_capabilities_get;
+ rte_tm_mark_ip_dscp;
+ rte_tm_mark_ip_ecn;
+ rte_tm_mark_vlan_dei;
+ rte_tm_node_add;
+ rte_tm_node_capabilities_get;
+ rte_tm_node_cman_update;
+ rte_tm_node_delete;
+ rte_tm_node_parent_update;
+ rte_tm_node_resume;
+ rte_tm_node_shaper_update;
+ rte_tm_node_shared_shaper_update;
+ rte_tm_node_shared_wred_context_update;
+ rte_tm_node_stats_read;
+ rte_tm_node_stats_update;
+ rte_tm_node_suspend;
+ rte_tm_node_type_get;
+ rte_tm_node_wfq_weight_mode_update;
+ rte_tm_node_wred_context_update;
+ rte_tm_shaper_profile_add;
+ rte_tm_shaper_profile_delete;
+ rte_tm_shared_shaper_add_update;
+ rte_tm_shared_shaper_delete;
+ rte_tm_shared_wred_context_add_update;
+ rte_tm_shared_wred_context_delete;
+ rte_tm_wred_profile_add;
+ rte_tm_wred_profile_delete;
+
+} DPDK_17.05;
diff --git a/lib/librte_ether/rte_flow.c b/lib/librte_ether/rte_flow.c
index aaa70d68..2001fbbf 100644
--- a/lib/librte_ether/rte_flow.c
+++ b/lib/librte_ether/rte_flow.c
@@ -31,14 +31,81 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <errno.h>
+#include <stddef.h>
#include <stdint.h>
+#include <string.h>
+#include <rte_common.h>
#include <rte_errno.h>
#include <rte_branch_prediction.h>
#include "rte_ethdev.h"
#include "rte_flow_driver.h"
#include "rte_flow.h"
+/**
+ * Flow elements description tables.
+ */
+struct rte_flow_desc_data {
+ const char *name;
+ size_t size;
+};
+
+/** Generate flow_item[] entry. */
+#define MK_FLOW_ITEM(t, s) \
+ [RTE_FLOW_ITEM_TYPE_ ## t] = { \
+ .name = # t, \
+ .size = s, \
+ }
+
+/** Information about known flow pattern items. */
+static const struct rte_flow_desc_data rte_flow_desc_item[] = {
+ MK_FLOW_ITEM(END, 0),
+ MK_FLOW_ITEM(VOID, 0),
+ MK_FLOW_ITEM(INVERT, 0),
+ MK_FLOW_ITEM(ANY, sizeof(struct rte_flow_item_any)),
+ MK_FLOW_ITEM(PF, 0),
+ MK_FLOW_ITEM(VF, sizeof(struct rte_flow_item_vf)),
+ MK_FLOW_ITEM(PORT, sizeof(struct rte_flow_item_port)),
+ MK_FLOW_ITEM(RAW, sizeof(struct rte_flow_item_raw)), /* +pattern[] */
+ MK_FLOW_ITEM(ETH, sizeof(struct rte_flow_item_eth)),
+ MK_FLOW_ITEM(VLAN, sizeof(struct rte_flow_item_vlan)),
+ MK_FLOW_ITEM(IPV4, sizeof(struct rte_flow_item_ipv4)),
+ MK_FLOW_ITEM(IPV6, sizeof(struct rte_flow_item_ipv6)),
+ MK_FLOW_ITEM(ICMP, sizeof(struct rte_flow_item_icmp)),
+ MK_FLOW_ITEM(UDP, sizeof(struct rte_flow_item_udp)),
+ MK_FLOW_ITEM(TCP, sizeof(struct rte_flow_item_tcp)),
+ MK_FLOW_ITEM(SCTP, sizeof(struct rte_flow_item_sctp)),
+ MK_FLOW_ITEM(VXLAN, sizeof(struct rte_flow_item_vxlan)),
+ MK_FLOW_ITEM(MPLS, sizeof(struct rte_flow_item_mpls)),
+ MK_FLOW_ITEM(GRE, sizeof(struct rte_flow_item_gre)),
+ MK_FLOW_ITEM(E_TAG, sizeof(struct rte_flow_item_e_tag)),
+ MK_FLOW_ITEM(NVGRE, sizeof(struct rte_flow_item_nvgre)),
+};
+
+/** Generate flow_action[] entry. */
+#define MK_FLOW_ACTION(t, s) \
+ [RTE_FLOW_ACTION_TYPE_ ## t] = { \
+ .name = # t, \
+ .size = s, \
+ }
+
+/** Information about known flow actions. */
+static const struct rte_flow_desc_data rte_flow_desc_action[] = {
+ MK_FLOW_ACTION(END, 0),
+ MK_FLOW_ACTION(VOID, 0),
+ MK_FLOW_ACTION(PASSTHRU, 0),
+ MK_FLOW_ACTION(MARK, sizeof(struct rte_flow_action_mark)),
+ MK_FLOW_ACTION(FLAG, 0),
+ MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)),
+ MK_FLOW_ACTION(DROP, 0),
+ MK_FLOW_ACTION(COUNT, 0),
+ MK_FLOW_ACTION(DUP, sizeof(struct rte_flow_action_dup)),
+ MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)), /* +queue[] */
+ MK_FLOW_ACTION(PF, 0),
+ MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)),
+};
+
/* Get generic flow operations structure from a port. */
const struct rte_flow_ops *
rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error)
@@ -157,3 +224,185 @@ rte_flow_query(uint8_t port_id,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
NULL, rte_strerror(ENOSYS));
}
+
+/* Restrict ingress traffic to the defined flow rules. */
+int
+rte_flow_isolate(uint8_t port_id,
+ int set,
+ struct rte_flow_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error);
+
+ if (!ops)
+ return -rte_errno;
+ if (likely(!!ops->isolate))
+ return ops->isolate(dev, set, error);
+ return -rte_flow_error_set(error, ENOSYS,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, rte_strerror(ENOSYS));
+}
+
+/** Compute storage space needed by item specification. */
+static void
+flow_item_spec_size(const struct rte_flow_item *item,
+ size_t *size, size_t *pad)
+{
+ if (!item->spec) {
+ *size = 0;
+ goto empty;
+ }
+ switch (item->type) {
+ union {
+ const struct rte_flow_item_raw *raw;
+ } spec;
+
+ /* Not a fall-through */
+ case RTE_FLOW_ITEM_TYPE_RAW:
+ spec.raw = item->spec;
+ *size = offsetof(struct rte_flow_item_raw, pattern) +
+ spec.raw->length * sizeof(*spec.raw->pattern);
+ break;
+ default:
+ *size = rte_flow_desc_item[item->type].size;
+ break;
+ }
+empty:
+ *pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size;
+}
+
+/** Compute storage space needed by action configuration. */
+static void
+flow_action_conf_size(const struct rte_flow_action *action,
+ size_t *size, size_t *pad)
+{
+ if (!action->conf) {
+ *size = 0;
+ goto empty;
+ }
+ switch (action->type) {
+ union {
+ const struct rte_flow_action_rss *rss;
+ } conf;
+
+ /* Not a fall-through. */
+ case RTE_FLOW_ACTION_TYPE_RSS:
+ conf.rss = action->conf;
+ *size = offsetof(struct rte_flow_action_rss, queue) +
+ conf.rss->num * sizeof(*conf.rss->queue);
+ break;
+ default:
+ *size = rte_flow_desc_action[action->type].size;
+ break;
+ }
+empty:
+ *pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size;
+}
+
+/** Store a full rte_flow description. */
+size_t
+rte_flow_copy(struct rte_flow_desc *desc, size_t len,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item *items,
+ const struct rte_flow_action *actions)
+{
+ struct rte_flow_desc *fd = NULL;
+ size_t tmp;
+ size_t pad;
+ size_t off1 = 0;
+ size_t off2 = 0;
+ size_t size = 0;
+
+store:
+ if (items) {
+ const struct rte_flow_item *item;
+
+ item = items;
+ if (fd)
+ fd->items = (void *)&fd->data[off1];
+ do {
+ struct rte_flow_item *dst = NULL;
+
+ if ((size_t)item->type >=
+ RTE_DIM(rte_flow_desc_item) ||
+ !rte_flow_desc_item[item->type].name) {
+ rte_errno = ENOTSUP;
+ return 0;
+ }
+ if (fd)
+ dst = memcpy(fd->data + off1, item,
+ sizeof(*item));
+ off1 += sizeof(*item);
+ flow_item_spec_size(item, &tmp, &pad);
+ if (item->spec) {
+ if (fd)
+ dst->spec = memcpy(fd->data + off2,
+ item->spec, tmp);
+ off2 += tmp + pad;
+ }
+ if (item->last) {
+ if (fd)
+ dst->last = memcpy(fd->data + off2,
+ item->last, tmp);
+ off2 += tmp + pad;
+ }
+ if (item->mask) {
+ if (fd)
+ dst->mask = memcpy(fd->data + off2,
+ item->mask, tmp);
+ off2 += tmp + pad;
+ }
+ off2 = RTE_ALIGN_CEIL(off2, sizeof(double));
+ } while ((item++)->type != RTE_FLOW_ITEM_TYPE_END);
+ off1 = RTE_ALIGN_CEIL(off1, sizeof(double));
+ }
+ if (actions) {
+ const struct rte_flow_action *action;
+
+ action = actions;
+ if (fd)
+ fd->actions = (void *)&fd->data[off1];
+ do {
+ struct rte_flow_action *dst = NULL;
+
+ if ((size_t)action->type >=
+ RTE_DIM(rte_flow_desc_action) ||
+ !rte_flow_desc_action[action->type].name) {
+ rte_errno = ENOTSUP;
+ return 0;
+ }
+ if (fd)
+ dst = memcpy(fd->data + off1, action,
+ sizeof(*action));
+ off1 += sizeof(*action);
+ flow_action_conf_size(action, &tmp, &pad);
+ if (action->conf) {
+ if (fd)
+ dst->conf = memcpy(fd->data + off2,
+ action->conf, tmp);
+ off2 += tmp + pad;
+ }
+ off2 = RTE_ALIGN_CEIL(off2, sizeof(double));
+ } while ((action++)->type != RTE_FLOW_ACTION_TYPE_END);
+ }
+ if (fd != NULL)
+ return size;
+ off1 = RTE_ALIGN_CEIL(off1, sizeof(double));
+ tmp = RTE_ALIGN_CEIL(offsetof(struct rte_flow_desc, data),
+ sizeof(double));
+ size = tmp + off1 + off2;
+ if (size > len)
+ return size;
+ fd = desc;
+ if (fd != NULL) {
+ *fd = (const struct rte_flow_desc) {
+ .size = size,
+ .attr = *attr,
+ };
+ tmp -= offsetof(struct rte_flow_desc, data);
+ off2 = tmp + off1;
+ off1 = tmp;
+ goto store;
+ }
+ return 0;
+}
diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
index c47edbc9..bba6169f 100644
--- a/lib/librte_ether/rte_flow.h
+++ b/lib/librte_ether/rte_flow.h
@@ -297,6 +297,18 @@ enum rte_flow_item_type {
* See struct rte_flow_item_gre.
*/
RTE_FLOW_ITEM_TYPE_GRE,
+
+ /**
+ * [META]
+ *
+ * Fuzzy pattern match, expect faster than default.
+ *
+ * This is for device that support fuzzy matching option.
+ * Usually a fuzzy matching is fast but the cost is accuracy.
+ *
+ * See struct rte_flow_item_fuzzy.
+ */
+ RTE_FLOW_ITEM_TYPE_FUZZY,
};
/**
@@ -429,7 +441,7 @@ static const struct rte_flow_item_raw rte_flow_item_raw_mask = {
struct rte_flow_item_eth {
struct ether_addr dst; /**< Destination MAC. */
struct ether_addr src; /**< Source MAC. */
- uint16_t type; /**< EtherType. */
+ rte_be16_t type; /**< EtherType. */
};
/** Default mask for RTE_FLOW_ITEM_TYPE_ETH. */
@@ -437,7 +449,7 @@ struct rte_flow_item_eth {
static const struct rte_flow_item_eth rte_flow_item_eth_mask = {
.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
- .type = 0x0000,
+ .type = RTE_BE16(0x0000),
};
#endif
@@ -450,15 +462,15 @@ static const struct rte_flow_item_eth rte_flow_item_eth_mask = {
* RTE_FLOW_ITEM_TYPE_VLAN.
*/
struct rte_flow_item_vlan {
- uint16_t tpid; /**< Tag protocol identifier. */
- uint16_t tci; /**< Tag control information. */
+ rte_be16_t tpid; /**< Tag protocol identifier. */
+ rte_be16_t tci; /**< Tag control information. */
};
/** Default mask for RTE_FLOW_ITEM_TYPE_VLAN. */
#ifndef __cplusplus
static const struct rte_flow_item_vlan rte_flow_item_vlan_mask = {
- .tpid = 0x0000,
- .tci = 0xffff,
+ .tpid = RTE_BE16(0x0000),
+ .tci = RTE_BE16(0xffff),
};
#endif
@@ -477,8 +489,8 @@ struct rte_flow_item_ipv4 {
#ifndef __cplusplus
static const struct rte_flow_item_ipv4 rte_flow_item_ipv4_mask = {
.hdr = {
- .src_addr = 0xffffffff,
- .dst_addr = 0xffffffff,
+ .src_addr = RTE_BE32(0xffffffff),
+ .dst_addr = RTE_BE32(0xffffffff),
},
};
#endif
@@ -540,8 +552,8 @@ struct rte_flow_item_udp {
#ifndef __cplusplus
static const struct rte_flow_item_udp rte_flow_item_udp_mask = {
.hdr = {
- .src_port = 0xffff,
- .dst_port = 0xffff,
+ .src_port = RTE_BE16(0xffff),
+ .dst_port = RTE_BE16(0xffff),
},
};
#endif
@@ -559,8 +571,8 @@ struct rte_flow_item_tcp {
#ifndef __cplusplus
static const struct rte_flow_item_tcp rte_flow_item_tcp_mask = {
.hdr = {
- .src_port = 0xffff,
- .dst_port = 0xffff,
+ .src_port = RTE_BE16(0xffff),
+ .dst_port = RTE_BE16(0xffff),
},
};
#endif
@@ -578,8 +590,8 @@ struct rte_flow_item_sctp {
#ifndef __cplusplus
static const struct rte_flow_item_sctp rte_flow_item_sctp_mask = {
.hdr = {
- .src_port = 0xffff,
- .dst_port = 0xffff,
+ .src_port = RTE_BE16(0xffff),
+ .dst_port = RTE_BE16(0xffff),
},
};
#endif
@@ -609,14 +621,14 @@ static const struct rte_flow_item_vxlan rte_flow_item_vxlan_mask = {
* Matches a E-tag header.
*/
struct rte_flow_item_e_tag {
- uint16_t tpid; /**< Tag protocol identifier (0x893F). */
+ rte_be16_t tpid; /**< Tag protocol identifier (0x893F). */
/**
* E-Tag control information (E-TCI).
* E-PCP (3b), E-DEI (1b), ingress E-CID base (12b).
*/
- uint16_t epcp_edei_in_ecid_b;
+ rte_be16_t epcp_edei_in_ecid_b;
/** Reserved (2b), GRP (2b), E-CID base (12b). */
- uint16_t rsvd_grp_ecid_b;
+ rte_be16_t rsvd_grp_ecid_b;
uint8_t in_ecid_e; /**< Ingress E-CID ext. */
uint8_t ecid_e; /**< E-CID ext. */
};
@@ -624,13 +636,7 @@ struct rte_flow_item_e_tag {
/** Default mask for RTE_FLOW_ITEM_TYPE_E_TAG. */
#ifndef __cplusplus
static const struct rte_flow_item_e_tag rte_flow_item_e_tag_mask = {
-#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
- .rsvd_grp_ecid_b = 0x3fff,
-#elif RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
- .rsvd_grp_ecid_b = 0xff3f,
-#else
-#error Unsupported endianness.
-#endif
+ .rsvd_grp_ecid_b = RTE_BE16(0x3fff),
};
#endif
@@ -646,8 +652,8 @@ struct rte_flow_item_nvgre {
*
* c_k_s_rsvd0_ver must have value 0x2000 according to RFC 7637.
*/
- uint16_t c_k_s_rsvd0_ver;
- uint16_t protocol; /**< Protocol type (0x6558). */
+ rte_be16_t c_k_s_rsvd0_ver;
+ rte_be16_t protocol; /**< Protocol type (0x6558). */
uint8_t tni[3]; /**< Virtual subnet ID. */
uint8_t flow_id; /**< Flow ID. */
};
@@ -689,14 +695,42 @@ struct rte_flow_item_gre {
* Checksum (1b), reserved 0 (12b), version (3b).
* Refer to RFC 2784.
*/
- uint16_t c_rsvd0_ver;
- uint16_t protocol; /**< Protocol type. */
+ rte_be16_t c_rsvd0_ver;
+ rte_be16_t protocol; /**< Protocol type. */
};
/** Default mask for RTE_FLOW_ITEM_TYPE_GRE. */
#ifndef __cplusplus
static const struct rte_flow_item_gre rte_flow_item_gre_mask = {
- .protocol = 0xffff,
+ .protocol = RTE_BE16(0xffff),
+};
+#endif
+
+/**
+ * RTE_FLOW_ITEM_TYPE_FUZZY
+ *
+ * Fuzzy pattern match, expect faster than default.
+ *
+ * This is for device that support fuzzy match option.
+ * Usually a fuzzy match is fast but the cost is accuracy.
+ * i.e. Signature Match only match pattern's hash value, but it is
+ * possible two different patterns have the same hash value.
+ *
+ * Matching accuracy level can be configure by threshold.
+ * Driver can divide the range of threshold and map to different
+ * accuracy levels that device support.
+ *
+ * Threshold 0 means perfect match (no fuzziness), while threshold
+ * 0xffffffff means fuzziest match.
+ */
+struct rte_flow_item_fuzzy {
+ uint32_t thresh; /**< Accuracy threshold. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_FUZZY. */
+#ifndef __cplusplus
+static const struct rte_flow_item_fuzzy rte_flow_item_fuzzy_mask = {
+ .thresh = 0xffffffff,
};
#endif
@@ -1191,6 +1225,90 @@ rte_flow_query(uint8_t port_id,
void *data,
struct rte_flow_error *error);
+/**
+ * Restrict ingress traffic to the defined flow rules.
+ *
+ * Isolated mode guarantees that all ingress traffic comes from defined flow
+ * rules only (current and future).
+ *
+ * Besides making ingress more deterministic, it allows PMDs to safely reuse
+ * resources otherwise assigned to handle the remaining traffic, such as
+ * global RSS configuration settings, VLAN filters, MAC address entries,
+ * legacy filter API rules and so on in order to expand the set of possible
+ * flow rule types.
+ *
+ * Calling this function as soon as possible after device initialization,
+ * ideally before the first call to rte_eth_dev_configure(), is recommended
+ * to avoid possible failures due to conflicting settings.
+ *
+ * Once effective, leaving isolated mode may not be possible depending on
+ * PMD implementation.
+ *
+ * Additionally, the following functionality has no effect on the underlying
+ * port and may return errors such as ENOTSUP ("not supported"):
+ *
+ * - Toggling promiscuous mode.
+ * - Toggling allmulticast mode.
+ * - Configuring MAC addresses.
+ * - Configuring multicast addresses.
+ * - Configuring VLAN filters.
+ * - Configuring Rx filters through the legacy API (e.g. FDIR).
+ * - Configuring global RSS settings.
+ *
+ * @param port_id
+ * Port identifier of Ethernet device.
+ * @param set
+ * Nonzero to enter isolated mode, attempt to leave it otherwise.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+rte_flow_isolate(uint8_t port_id, int set, struct rte_flow_error *error);
+
+/**
+ * Generic flow representation.
+ *
+ * This form is sufficient to describe an rte_flow independently from any
+ * PMD implementation and allows for replayability and identification.
+ */
+struct rte_flow_desc {
+ size_t size; /**< Allocated space including data[]. */
+ struct rte_flow_attr attr; /**< Attributes. */
+ struct rte_flow_item *items; /**< Items. */
+ struct rte_flow_action *actions; /**< Actions. */
+ uint8_t data[]; /**< Storage for items/actions. */
+};
+
+/**
+ * Copy an rte_flow rule description.
+ *
+ * @param[in] fd
+ * Flow rule description.
+ * @param[in] len
+ * Total size of allocated data for the flow description.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ *
+ * @return
+ * If len is greater or equal to the size of the flow, the total size of the
+ * flow description and its data.
+ * If len is lower than the size of the flow, the number of bytes that would
+ * have been written to desc had it been sufficient. Nothing is written.
+ */
+size_t
+rte_flow_copy(struct rte_flow_desc *fd, size_t len,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item *items,
+ const struct rte_flow_action *actions);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ether/rte_flow_driver.h b/lib/librte_ether/rte_flow_driver.h
index da5749d5..4d95391d 100644
--- a/lib/librte_ether/rte_flow_driver.h
+++ b/lib/librte_ether/rte_flow_driver.h
@@ -120,6 +120,11 @@ struct rte_flow_ops {
enum rte_flow_action_type,
void *,
struct rte_flow_error *);
+ /** See rte_flow_isolate(). */
+ int (*isolate)
+ (struct rte_eth_dev *,
+ int,
+ struct rte_flow_error *);
};
/**
diff --git a/lib/librte_ether/rte_tm.c b/lib/librte_ether/rte_tm.c
new file mode 100644
index 00000000..71679650
--- /dev/null
+++ b/lib/librte_ether/rte_tm.c
@@ -0,0 +1,438 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#include <rte_errno.h>
+#include "rte_ethdev.h"
+#include "rte_tm_driver.h"
+#include "rte_tm.h"
+
+/* Get generic traffic manager operations structure from a port. */
+const struct rte_tm_ops *
+rte_tm_ops_get(uint8_t port_id, struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ const struct rte_tm_ops *ops;
+
+ if (!rte_eth_dev_is_valid_port(port_id)) {
+ rte_tm_error_set(error,
+ ENODEV,
+ RTE_TM_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ rte_strerror(ENODEV));
+ return NULL;
+ }
+
+ if ((dev->dev_ops->tm_ops_get == NULL) ||
+ (dev->dev_ops->tm_ops_get(dev, &ops) != 0) ||
+ (ops == NULL)) {
+ rte_tm_error_set(error,
+ ENOSYS,
+ RTE_TM_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ rte_strerror(ENOSYS));
+ return NULL;
+ }
+
+ return ops;
+}
+
+#define RTE_TM_FUNC(port_id, func) \
+({ \
+ const struct rte_tm_ops *ops = \
+ rte_tm_ops_get(port_id, error); \
+ if (ops == NULL) \
+ return -rte_errno; \
+ \
+ if (ops->func == NULL) \
+ return -rte_tm_error_set(error, \
+ ENOSYS, \
+ RTE_TM_ERROR_TYPE_UNSPECIFIED, \
+ NULL, \
+ rte_strerror(ENOSYS)); \
+ \
+ ops->func; \
+})
+
+/* Get number of leaf nodes */
+int
+rte_tm_get_number_of_leaf_nodes(uint8_t port_id,
+ uint32_t *n_leaf_nodes,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ const struct rte_tm_ops *ops =
+ rte_tm_ops_get(port_id, error);
+
+ if (ops == NULL)
+ return -rte_errno;
+
+ if (n_leaf_nodes == NULL) {
+ rte_tm_error_set(error,
+ EINVAL,
+ RTE_TM_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ rte_strerror(EINVAL));
+ return -rte_errno;
+ }
+
+ *n_leaf_nodes = dev->data->nb_tx_queues;
+ return 0;
+}
+
+/* Check node type (leaf or non-leaf) */
+int
+rte_tm_node_type_get(uint8_t port_id,
+ uint32_t node_id,
+ int *is_leaf,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, node_type_get)(dev,
+ node_id, is_leaf, error);
+}
+
+/* Get capabilities */
+int rte_tm_capabilities_get(uint8_t port_id,
+ struct rte_tm_capabilities *cap,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, capabilities_get)(dev,
+ cap, error);
+}
+
+/* Get level capabilities */
+int rte_tm_level_capabilities_get(uint8_t port_id,
+ uint32_t level_id,
+ struct rte_tm_level_capabilities *cap,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, level_capabilities_get)(dev,
+ level_id, cap, error);
+}
+
+/* Get node capabilities */
+int rte_tm_node_capabilities_get(uint8_t port_id,
+ uint32_t node_id,
+ struct rte_tm_node_capabilities *cap,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, node_capabilities_get)(dev,
+ node_id, cap, error);
+}
+
+/* Add WRED profile */
+int rte_tm_wred_profile_add(uint8_t port_id,
+ uint32_t wred_profile_id,
+ struct rte_tm_wred_params *profile,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, wred_profile_add)(dev,
+ wred_profile_id, profile, error);
+}
+
+/* Delete WRED profile */
+int rte_tm_wred_profile_delete(uint8_t port_id,
+ uint32_t wred_profile_id,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, wred_profile_delete)(dev,
+ wred_profile_id, error);
+}
+
+/* Add/update shared WRED context */
+int rte_tm_shared_wred_context_add_update(uint8_t port_id,
+ uint32_t shared_wred_context_id,
+ uint32_t wred_profile_id,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, shared_wred_context_add_update)(dev,
+ shared_wred_context_id, wred_profile_id, error);
+}
+
+/* Delete shared WRED context */
+int rte_tm_shared_wred_context_delete(uint8_t port_id,
+ uint32_t shared_wred_context_id,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, shared_wred_context_delete)(dev,
+ shared_wred_context_id, error);
+}
+
+/* Add shaper profile */
+int rte_tm_shaper_profile_add(uint8_t port_id,
+ uint32_t shaper_profile_id,
+ struct rte_tm_shaper_params *profile,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, shaper_profile_add)(dev,
+ shaper_profile_id, profile, error);
+}
+
+/* Delete WRED profile */
+int rte_tm_shaper_profile_delete(uint8_t port_id,
+ uint32_t shaper_profile_id,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, shaper_profile_delete)(dev,
+ shaper_profile_id, error);
+}
+
+/* Add shared shaper */
+int rte_tm_shared_shaper_add_update(uint8_t port_id,
+ uint32_t shared_shaper_id,
+ uint32_t shaper_profile_id,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, shared_shaper_add_update)(dev,
+ shared_shaper_id, shaper_profile_id, error);
+}
+
+/* Delete shared shaper */
+int rte_tm_shared_shaper_delete(uint8_t port_id,
+ uint32_t shared_shaper_id,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, shared_shaper_delete)(dev,
+ shared_shaper_id, error);
+}
+
+/* Add node to port traffic manager hierarchy */
+int rte_tm_node_add(uint8_t port_id,
+ uint32_t node_id,
+ uint32_t parent_node_id,
+ uint32_t priority,
+ uint32_t weight,
+ uint32_t level_id,
+ struct rte_tm_node_params *params,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, node_add)(dev,
+ node_id, parent_node_id, priority, weight, level_id,
+ params, error);
+}
+
+/* Delete node from traffic manager hierarchy */
+int rte_tm_node_delete(uint8_t port_id,
+ uint32_t node_id,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, node_delete)(dev,
+ node_id, error);
+}
+
+/* Suspend node */
+int rte_tm_node_suspend(uint8_t port_id,
+ uint32_t node_id,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, node_suspend)(dev,
+ node_id, error);
+}
+
+/* Resume node */
+int rte_tm_node_resume(uint8_t port_id,
+ uint32_t node_id,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, node_resume)(dev,
+ node_id, error);
+}
+
+/* Commit the initial port traffic manager hierarchy */
+int rte_tm_hierarchy_commit(uint8_t port_id,
+ int clear_on_fail,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, hierarchy_commit)(dev,
+ clear_on_fail, error);
+}
+
+/* Update node parent */
+int rte_tm_node_parent_update(uint8_t port_id,
+ uint32_t node_id,
+ uint32_t parent_node_id,
+ uint32_t priority,
+ uint32_t weight,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, node_parent_update)(dev,
+ node_id, parent_node_id, priority, weight, error);
+}
+
+/* Update node private shaper */
+int rte_tm_node_shaper_update(uint8_t port_id,
+ uint32_t node_id,
+ uint32_t shaper_profile_id,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, node_shaper_update)(dev,
+ node_id, shaper_profile_id, error);
+}
+
+/* Update node shared shapers */
+int rte_tm_node_shared_shaper_update(uint8_t port_id,
+ uint32_t node_id,
+ uint32_t shared_shaper_id,
+ int add,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, node_shared_shaper_update)(dev,
+ node_id, shared_shaper_id, add, error);
+}
+
+/* Update node stats */
+int rte_tm_node_stats_update(uint8_t port_id,
+ uint32_t node_id,
+ uint64_t stats_mask,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, node_stats_update)(dev,
+ node_id, stats_mask, error);
+}
+
+/* Update WFQ weight mode */
+int rte_tm_node_wfq_weight_mode_update(uint8_t port_id,
+ uint32_t node_id,
+ int *wfq_weight_mode,
+ uint32_t n_sp_priorities,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, node_wfq_weight_mode_update)(dev,
+ node_id, wfq_weight_mode, n_sp_priorities, error);
+}
+
+/* Update node congestion management mode */
+int rte_tm_node_cman_update(uint8_t port_id,
+ uint32_t node_id,
+ enum rte_tm_cman_mode cman,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, node_cman_update)(dev,
+ node_id, cman, error);
+}
+
+/* Update node private WRED context */
+int rte_tm_node_wred_context_update(uint8_t port_id,
+ uint32_t node_id,
+ uint32_t wred_profile_id,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, node_wred_context_update)(dev,
+ node_id, wred_profile_id, error);
+}
+
+/* Update node shared WRED context */
+int rte_tm_node_shared_wred_context_update(uint8_t port_id,
+ uint32_t node_id,
+ uint32_t shared_wred_context_id,
+ int add,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, node_shared_wred_context_update)(dev,
+ node_id, shared_wred_context_id, add, error);
+}
+
+/* Read and/or clear stats counters for specific node */
+int rte_tm_node_stats_read(uint8_t port_id,
+ uint32_t node_id,
+ struct rte_tm_node_stats *stats,
+ uint64_t *stats_mask,
+ int clear,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, node_stats_read)(dev,
+ node_id, stats, stats_mask, clear, error);
+}
+
+/* Packet marking - VLAN DEI */
+int rte_tm_mark_vlan_dei(uint8_t port_id,
+ int mark_green,
+ int mark_yellow,
+ int mark_red,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, mark_vlan_dei)(dev,
+ mark_green, mark_yellow, mark_red, error);
+}
+
+/* Packet marking - IPv4/IPv6 ECN */
+int rte_tm_mark_ip_ecn(uint8_t port_id,
+ int mark_green,
+ int mark_yellow,
+ int mark_red,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, mark_ip_ecn)(dev,
+ mark_green, mark_yellow, mark_red, error);
+}
+
+/* Packet marking - IPv4/IPv6 DSCP */
+int rte_tm_mark_ip_dscp(uint8_t port_id,
+ int mark_green,
+ int mark_yellow,
+ int mark_red,
+ struct rte_tm_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_TM_FUNC(port_id, mark_ip_dscp)(dev,
+ mark_green, mark_yellow, mark_red, error);
+}
diff --git a/lib/librte_ether/rte_tm.h b/lib/librte_ether/rte_tm.h
new file mode 100644
index 00000000..ebbfa1ee
--- /dev/null
+++ b/lib/librte_ether/rte_tm.h
@@ -0,0 +1,1912 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation.
+ * Copyright(c) 2017 Cavium.
+ * Copyright(c) 2017 NXP.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_TM_H__
+#define __INCLUDE_RTE_TM_H__
+
+/**
+ * @file
+ * RTE Generic Traffic Manager API
+ *
+ * This interface provides the ability to configure the traffic manager in a
+ * generic way. It includes features such as: hierarchical scheduling,
+ * traffic shaping, congestion management, packet marking, etc.
+ *
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ */
+
+#include <stdint.h>
+
+#include <rte_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Ethernet framing overhead.
+ *
+ * Overhead fields per Ethernet frame:
+ * 1. Preamble: 7 bytes;
+ * 2. Start of Frame Delimiter (SFD): 1 byte;
+ * 3. Inter-Frame Gap (IFG): 12 bytes.
+ *
+ * One of the typical values for the *pkt_length_adjust* field of the shaper
+ * profile.
+ *
+ * @see struct rte_tm_shaper_params
+ */
+#define RTE_TM_ETH_FRAMING_OVERHEAD 20
+
+/**
+ * Ethernet framing overhead including the Frame Check Sequence (FCS) field.
+ * Useful when FCS is generated and added at the end of the Ethernet frame on
+ * TX side without any SW intervention.
+ *
+ * One of the typical values for the pkt_length_adjust field of the shaper
+ * profile.
+ *
+ * @see struct rte_tm_shaper_params
+ */
+#define RTE_TM_ETH_FRAMING_OVERHEAD_FCS 24
+
+/**
+ * Invalid WRED profile ID.
+ *
+ * @see struct rte_tm_node_params
+ * @see rte_tm_node_add()
+ * @see rte_tm_node_wred_context_update()
+ */
+#define RTE_TM_WRED_PROFILE_ID_NONE UINT32_MAX
+
+/**
+ *Invalid shaper profile ID.
+ *
+ * @see struct rte_tm_node_params
+ * @see rte_tm_node_add()
+ * @see rte_tm_node_shaper_update()
+ */
+#define RTE_TM_SHAPER_PROFILE_ID_NONE UINT32_MAX
+
+/**
+ * Node ID for the parent of the root node.
+ *
+ * @see rte_tm_node_add()
+ */
+#define RTE_TM_NODE_ID_NULL UINT32_MAX
+
+/**
+ * Node level ID used to disable level ID checking.
+ *
+ * @see rte_tm_node_add()
+ */
+#define RTE_TM_NODE_LEVEL_ID_ANY UINT32_MAX
+
+/**
+ * Color
+ */
+enum rte_tm_color {
+ RTE_TM_GREEN = 0, /**< Green */
+ RTE_TM_YELLOW, /**< Yellow */
+ RTE_TM_RED, /**< Red */
+ RTE_TM_COLORS /**< Number of colors */
+};
+
+/**
+ * Node statistics counter type
+ */
+enum rte_tm_stats_type {
+ /** Number of packets scheduled from current node. */
+ RTE_TM_STATS_N_PKTS = 1 << 0,
+
+ /** Number of bytes scheduled from current node. */
+ RTE_TM_STATS_N_BYTES = 1 << 1,
+
+ /** Number of green packets dropped by current leaf node. */
+ RTE_TM_STATS_N_PKTS_GREEN_DROPPED = 1 << 2,
+
+ /** Number of yellow packets dropped by current leaf node. */
+ RTE_TM_STATS_N_PKTS_YELLOW_DROPPED = 1 << 3,
+
+ /** Number of red packets dropped by current leaf node. */
+ RTE_TM_STATS_N_PKTS_RED_DROPPED = 1 << 4,
+
+ /** Number of green bytes dropped by current leaf node. */
+ RTE_TM_STATS_N_BYTES_GREEN_DROPPED = 1 << 5,
+
+ /** Number of yellow bytes dropped by current leaf node. */
+ RTE_TM_STATS_N_BYTES_YELLOW_DROPPED = 1 << 6,
+
+ /** Number of red bytes dropped by current leaf node. */
+ RTE_TM_STATS_N_BYTES_RED_DROPPED = 1 << 7,
+
+ /** Number of packets currently waiting in the packet queue of current
+ * leaf node.
+ */
+ RTE_TM_STATS_N_PKTS_QUEUED = 1 << 8,
+
+ /** Number of bytes currently waiting in the packet queue of current
+ * leaf node.
+ */
+ RTE_TM_STATS_N_BYTES_QUEUED = 1 << 9,
+};
+
+/**
+ * Node statistics counters
+ */
+struct rte_tm_node_stats {
+ /** Number of packets scheduled from current node. */
+ uint64_t n_pkts;
+
+ /** Number of bytes scheduled from current node. */
+ uint64_t n_bytes;
+
+ /** Statistics counters for leaf nodes only. */
+ struct {
+ /** Number of packets dropped by current leaf node per each
+ * color.
+ */
+ uint64_t n_pkts_dropped[RTE_TM_COLORS];
+
+ /** Number of bytes dropped by current leaf node per each
+ * color.
+ */
+ uint64_t n_bytes_dropped[RTE_TM_COLORS];
+
+ /** Number of packets currently waiting in the packet queue of
+ * current leaf node.
+ */
+ uint64_t n_pkts_queued;
+
+ /** Number of bytes currently waiting in the packet queue of
+ * current leaf node.
+ */
+ uint64_t n_bytes_queued;
+ } leaf;
+};
+
+/**
+ * Traffic manager dynamic updates
+ */
+enum rte_tm_dynamic_update_type {
+ /** Dynamic parent node update. The new parent node is located on same
+ * hierarchy level as the former parent node. Consequently, the node
+ * whose parent is changed preserves its hierarchy level.
+ */
+ RTE_TM_UPDATE_NODE_PARENT_KEEP_LEVEL = 1 << 0,
+
+ /** Dynamic parent node update. The new parent node is located on
+ * different hierarchy level than the former parent node. Consequently,
+ * the node whose parent is changed also changes its hierarchy level.
+ */
+ RTE_TM_UPDATE_NODE_PARENT_CHANGE_LEVEL = 1 << 1,
+
+ /** Dynamic node add/delete. */
+ RTE_TM_UPDATE_NODE_ADD_DELETE = 1 << 2,
+
+ /** Suspend/resume nodes. */
+ RTE_TM_UPDATE_NODE_SUSPEND_RESUME = 1 << 3,
+
+ /** Dynamic switch between byte-based and packet-based WFQ weights. */
+ RTE_TM_UPDATE_NODE_WFQ_WEIGHT_MODE = 1 << 4,
+
+ /** Dynamic update on number of SP priorities. */
+ RTE_TM_UPDATE_NODE_N_SP_PRIORITIES = 1 << 5,
+
+ /** Dynamic update of congestion management mode for leaf nodes. */
+ RTE_TM_UPDATE_NODE_CMAN = 1 << 6,
+
+ /** Dynamic update of the set of enabled stats counter types. */
+ RTE_TM_UPDATE_NODE_STATS = 1 << 7,
+};
+
+/**
+ * Traffic manager capabilities
+ */
+struct rte_tm_capabilities {
+ /** Maximum number of nodes. */
+ uint32_t n_nodes_max;
+
+ /** Maximum number of levels (i.e. number of nodes connecting the root
+ * node with any leaf node, including the root and the leaf).
+ */
+ uint32_t n_levels_max;
+
+ /** When non-zero, this flag indicates that all the non-leaf nodes
+ * (with the exception of the root node) have identical capability set.
+ */
+ int non_leaf_nodes_identical;
+
+ /** When non-zero, this flag indicates that all the leaf nodes have
+ * identical capability set.
+ */
+ int leaf_nodes_identical;
+
+ /** Maximum number of shapers, either private or shared. In case the
+ * implementation does not share any resources between private and
+ * shared shapers, it is typically equal to the sum of
+ * *shaper_private_n_max* and *shaper_shared_n_max*. The
+ * value of zero indicates that traffic shaping is not supported.
+ */
+ uint32_t shaper_n_max;
+
+ /** Maximum number of private shapers. Indicates the maximum number of
+ * nodes that can concurrently have their private shaper enabled. The
+ * value of zero indicates that private shapers are not supported.
+ */
+ uint32_t shaper_private_n_max;
+
+ /** Maximum number of private shapers that support dual rate shaping.
+ * Indicates the maximum number of nodes that can concurrently have
+ * their private shaper enabled with dual rate support. Only valid when
+ * private shapers are supported. The value of zero indicates that dual
+ * rate shaping is not available for private shapers. The maximum value
+ * is *shaper_private_n_max*.
+ */
+ int shaper_private_dual_rate_n_max;
+
+ /** Minimum committed/peak rate (bytes per second) for any private
+ * shaper. Valid only when private shapers are supported.
+ */
+ uint64_t shaper_private_rate_min;
+
+ /** Maximum committed/peak rate (bytes per second) for any private
+ * shaper. Valid only when private shapers are supported.
+ */
+ uint64_t shaper_private_rate_max;
+
+ /** Maximum number of shared shapers. The value of zero indicates that
+ * shared shapers are not supported.
+ */
+ uint32_t shaper_shared_n_max;
+
+ /** Maximum number of nodes that can share the same shared shaper.
+ * Only valid when shared shapers are supported.
+ */
+ uint32_t shaper_shared_n_nodes_per_shaper_max;
+
+ /** Maximum number of shared shapers a node can be part of. This
+ * parameter indicates that there is at least one node that can be
+ * configured with this many shared shapers, which might not be true for
+ * all the nodes. Only valid when shared shapers are supported, in which
+ * case it ranges from 1 to *shaper_shared_n_max*.
+ */
+ uint32_t shaper_shared_n_shapers_per_node_max;
+
+ /** Maximum number of shared shapers that can be configured with dual
+ * rate shaping. The value of zero indicates that dual rate shaping
+ * support is not available for shared shapers.
+ */
+ uint32_t shaper_shared_dual_rate_n_max;
+
+ /** Minimum committed/peak rate (bytes per second) for any shared
+ * shaper. Only valid when shared shapers are supported.
+ */
+ uint64_t shaper_shared_rate_min;
+
+ /** Maximum committed/peak rate (bytes per second) for any shared
+ * shaper. Only valid when shared shapers are supported.
+ */
+ uint64_t shaper_shared_rate_max;
+
+ /** Minimum value allowed for packet length adjustment for any private
+ * or shared shaper.
+ */
+ int shaper_pkt_length_adjust_min;
+
+ /** Maximum value allowed for packet length adjustment for any private
+ * or shared shaper.
+ */
+ int shaper_pkt_length_adjust_max;
+
+ /** Maximum number of children nodes. This parameter indicates that
+ * there is at least one non-leaf node that can be configured with this
+ * many children nodes, which might not be true for all the non-leaf
+ * nodes.
+ */
+ uint32_t sched_n_children_max;
+
+ /** Maximum number of supported priority levels. This parameter
+ * indicates that there is at least one non-leaf node that can be
+ * configured with this many priority levels for managing its children
+ * nodes, which might not be true for all the non-leaf nodes. The value
+ * of zero is invalid. The value of 1 indicates that only priority 0 is
+ * supported, which essentially means that Strict Priority (SP)
+ * algorithm is not supported.
+ */
+ uint32_t sched_sp_n_priorities_max;
+
+ /** Maximum number of sibling nodes that can have the same priority at
+ * any given time, i.e. maximum size of the WFQ sibling node group. This
+ * parameter indicates there is at least one non-leaf node that meets
+ * this condition, which might not be true for all the non-leaf nodes.
+ * The value of zero is invalid. The value of 1 indicates that WFQ
+ * algorithm is not supported. The maximum value is
+ * *sched_n_children_max*.
+ */
+ uint32_t sched_wfq_n_children_per_group_max;
+
+ /** Maximum number of priority levels that can have more than one child
+ * node at any given time, i.e. maximum number of WFQ sibling node
+ * groups that have two or more members. This parameter indicates there
+ * is at least one non-leaf node that meets this condition, which might
+ * not be true for all the non-leaf nodes. The value of zero states that
+ * WFQ algorithm is not supported. The value of 1 indicates that
+ * (*sched_sp_n_priorities_max* - 1) priority levels have at most one
+ * child node, so there can be only one priority level with two or
+ * more sibling nodes making up a WFQ group. The maximum value is:
+ * min(floor(*sched_n_children_max* / 2), *sched_sp_n_priorities_max*).
+ */
+ uint32_t sched_wfq_n_groups_max;
+
+ /** Maximum WFQ weight. The value of 1 indicates that all sibling nodes
+ * with same priority have the same WFQ weight, so WFQ is reduced to FQ.
+ */
+ uint32_t sched_wfq_weight_max;
+
+ /** Head drop algorithm support. When non-zero, this parameter
+ * indicates that there is at least one leaf node that supports the head
+ * drop algorithm, which might not be true for all the leaf nodes.
+ */
+ int cman_head_drop_supported;
+
+ /** Maximum number of WRED contexts, either private or shared. In case
+ * the implementation does not share any resources between private and
+ * shared WRED contexts, it is typically equal to the sum of
+ * *cman_wred_context_private_n_max* and
+ * *cman_wred_context_shared_n_max*. The value of zero indicates that
+ * WRED is not supported.
+ */
+ uint32_t cman_wred_context_n_max;
+
+ /** Maximum number of private WRED contexts. Indicates the maximum
+ * number of leaf nodes that can concurrently have their private WRED
+ * context enabled. The value of zero indicates that private WRED
+ * contexts are not supported.
+ */
+ uint32_t cman_wred_context_private_n_max;
+
+ /** Maximum number of shared WRED contexts. The value of zero
+ * indicates that shared WRED contexts are not supported.
+ */
+ uint32_t cman_wred_context_shared_n_max;
+
+ /** Maximum number of leaf nodes that can share the same WRED context.
+ * Only valid when shared WRED contexts are supported.
+ */
+ uint32_t cman_wred_context_shared_n_nodes_per_context_max;
+
+ /** Maximum number of shared WRED contexts a leaf node can be part of.
+ * This parameter indicates that there is at least one leaf node that
+ * can be configured with this many shared WRED contexts, which might
+ * not be true for all the leaf nodes. Only valid when shared WRED
+ * contexts are supported, in which case it ranges from 1 to
+ * *cman_wred_context_shared_n_max*.
+ */
+ uint32_t cman_wred_context_shared_n_contexts_per_node_max;
+
+ /** Support for VLAN DEI packet marking (per color). */
+ int mark_vlan_dei_supported[RTE_TM_COLORS];
+
+ /** Support for IPv4/IPv6 ECN marking of TCP packets (per color). */
+ int mark_ip_ecn_tcp_supported[RTE_TM_COLORS];
+
+ /** Support for IPv4/IPv6 ECN marking of SCTP packets (per color). */
+ int mark_ip_ecn_sctp_supported[RTE_TM_COLORS];
+
+ /** Support for IPv4/IPv6 DSCP packet marking (per color). */
+ int mark_ip_dscp_supported[RTE_TM_COLORS];
+
+ /** Set of supported dynamic update operations.
+ * @see enum rte_tm_dynamic_update_type
+ */
+ uint64_t dynamic_update_mask;
+
+ /** Set of supported statistics counter types.
+ * @see enum rte_tm_stats_type
+ */
+ uint64_t stats_mask;
+};
+
+/**
+ * Traffic manager level capabilities
+ */
+struct rte_tm_level_capabilities {
+ /** Maximum number of nodes for the current hierarchy level. */
+ uint32_t n_nodes_max;
+
+ /** Maximum number of non-leaf nodes for the current hierarchy level.
+ * The value of 0 indicates that current level only supports leaf
+ * nodes. The maximum value is *n_nodes_max*.
+ */
+ uint32_t n_nodes_nonleaf_max;
+
+ /** Maximum number of leaf nodes for the current hierarchy level. The
+ * value of 0 indicates that current level only supports non-leaf
+ * nodes. The maximum value is *n_nodes_max*.
+ */
+ uint32_t n_nodes_leaf_max;
+
+ /** When non-zero, this flag indicates that all the non-leaf nodes on
+ * this level have identical capability set. Valid only when
+ * *n_nodes_nonleaf_max* is non-zero.
+ */
+ int non_leaf_nodes_identical;
+
+ /** When non-zero, this flag indicates that all the leaf nodes on this
+ * level have identical capability set. Valid only when
+ * *n_nodes_leaf_max* is non-zero.
+ */
+ int leaf_nodes_identical;
+
+ RTE_STD_C11
+ union {
+ /** Items valid only for the non-leaf nodes on this level. */
+ struct {
+ /** Private shaper support. When non-zero, it indicates
+ * there is at least one non-leaf node on this level
+ * with private shaper support, which may not be the
+ * case for all the non-leaf nodes on this level.
+ */
+ int shaper_private_supported;
+
+ /** Dual rate support for private shaper. Valid only
+ * when private shaper is supported for the non-leaf
+ * nodes on the current level. When non-zero, it
+ * indicates there is at least one non-leaf node on this
+ * level with dual rate private shaper support, which
+ * may not be the case for all the non-leaf nodes on
+ * this level.
+ */
+ int shaper_private_dual_rate_supported;
+
+ /** Minimum committed/peak rate (bytes per second) for
+ * private shapers of the non-leaf nodes of this level.
+ * Valid only when private shaper is supported on this
+ * level.
+ */
+ uint64_t shaper_private_rate_min;
+
+ /** Maximum committed/peak rate (bytes per second) for
+ * private shapers of the non-leaf nodes on this level.
+ * Valid only when private shaper is supported on this
+ * level.
+ */
+ uint64_t shaper_private_rate_max;
+
+ /** Maximum number of shared shapers that any non-leaf
+ * node on this level can be part of. The value of zero
+ * indicates that shared shapers are not supported by
+ * the non-leaf nodes on this level. When non-zero, it
+ * indicates there is at least one non-leaf node on this
+ * level that meets this condition, which may not be the
+ * case for all the non-leaf nodes on this level.
+ */
+ uint32_t shaper_shared_n_max;
+
+ /** Maximum number of children nodes. This parameter
+ * indicates that there is at least one non-leaf node on
+ * this level that can be configured with this many
+ * children nodes, which might not be true for all the
+ * non-leaf nodes on this level.
+ */
+ uint32_t sched_n_children_max;
+
+ /** Maximum number of supported priority levels. This
+ * parameter indicates that there is at least one
+ * non-leaf node on this level that can be configured
+ * with this many priority levels for managing its
+ * children nodes, which might not be true for all the
+ * non-leaf nodes on this level. The value of zero is
+ * invalid. The value of 1 indicates that only priority
+ * 0 is supported, which essentially means that Strict
+ * Priority (SP) algorithm is not supported on this
+ * level.
+ */
+ uint32_t sched_sp_n_priorities_max;
+
+ /** Maximum number of sibling nodes that can have the
+ * same priority at any given time, i.e. maximum size of
+ * the WFQ sibling node group. This parameter indicates
+ * there is at least one non-leaf node on this level
+ * that meets this condition, which may not be true for
+ * all the non-leaf nodes on this level. The value of
+ * zero is invalid. The value of 1 indicates that WFQ
+ * algorithm is not supported on this level. The maximum
+ * value is *sched_n_children_max*.
+ */
+ uint32_t sched_wfq_n_children_per_group_max;
+
+ /** Maximum number of priority levels that can have
+ * more than one child node at any given time, i.e.
+ * maximum number of WFQ sibling node groups that
+ * have two or more members. This parameter indicates
+ * there is at least one non-leaf node on this level
+ * that meets this condition, which might not be true
+ * for all the non-leaf nodes. The value of zero states
+ * that WFQ algorithm is not supported on this level.
+ * The value of 1 indicates that
+ * (*sched_sp_n_priorities_max* - 1) priority levels on
+ * this level have at most one child node, so there can
+ * be only one priority level with two or more sibling
+ * nodes making up a WFQ group on this level. The
+ * maximum value is:
+ * min(floor(*sched_n_children_max* / 2),
+ * *sched_sp_n_priorities_max*).
+ */
+ uint32_t sched_wfq_n_groups_max;
+
+ /** Maximum WFQ weight. The value of 1 indicates that
+ * all sibling nodes on this level with same priority
+ * have the same WFQ weight, so on this level WFQ is
+ * reduced to FQ.
+ */
+ uint32_t sched_wfq_weight_max;
+
+ /** Mask of statistics counter types supported by the
+ * non-leaf nodes on this level. Every supported
+ * statistics counter type is supported by at least one
+ * non-leaf node on this level, which may not be true
+ * for all the non-leaf nodes on this level.
+ * @see enum rte_tm_stats_type
+ */
+ uint64_t stats_mask;
+ } nonleaf;
+
+ /** Items valid only for the leaf nodes on this level. */
+ struct {
+ /** Private shaper support. When non-zero, it indicates
+ * there is at least one leaf node on this level with
+ * private shaper support, which may not be the case for
+ * all the leaf nodes on this level.
+ */
+ int shaper_private_supported;
+
+ /** Dual rate support for private shaper. Valid only
+ * when private shaper is supported for the leaf nodes
+ * on this level. When non-zero, it indicates there is
+ * at least one leaf node on this level with dual rate
+ * private shaper support, which may not be the case for
+ * all the leaf nodes on this level.
+ */
+ int shaper_private_dual_rate_supported;
+
+ /** Minimum committed/peak rate (bytes per second) for
+ * private shapers of the leaf nodes of this level.
+ * Valid only when private shaper is supported for the
+ * leaf nodes on this level.
+ */
+ uint64_t shaper_private_rate_min;
+
+ /** Maximum committed/peak rate (bytes per second) for
+ * private shapers of the leaf nodes on this level.
+ * Valid only when private shaper is supported for the
+ * leaf nodes on this level.
+ */
+ uint64_t shaper_private_rate_max;
+
+ /** Maximum number of shared shapers that any leaf node
+ * on this level can be part of. The value of zero
+ * indicates that shared shapers are not supported by
+ * the leaf nodes on this level. When non-zero, it
+ * indicates there is at least one leaf node on this
+ * level that meets this condition, which may not be the
+ * case for all the leaf nodes on this level.
+ */
+ uint32_t shaper_shared_n_max;
+
+ /** Head drop algorithm support. When non-zero, this
+ * parameter indicates that there is at least one leaf
+ * node on this level that supports the head drop
+ * algorithm, which might not be true for all the leaf
+ * nodes on this level.
+ */
+ int cman_head_drop_supported;
+
+ /** Private WRED context support. When non-zero, it
+ * indicates there is at least one node on this level
+ * with private WRED context support, which may not be
+ * true for all the leaf nodes on this level.
+ */
+ int cman_wred_context_private_supported;
+
+ /** Maximum number of shared WRED contexts that any
+ * leaf node on this level can be part of. The value of
+ * zero indicates that shared WRED contexts are not
+ * supported by the leaf nodes on this level. When
+ * non-zero, it indicates there is at least one leaf
+ * node on this level that meets this condition, which
+ * may not be the case for all the leaf nodes on this
+ * level.
+ */
+ uint32_t cman_wred_context_shared_n_max;
+
+ /** Mask of statistics counter types supported by the
+ * leaf nodes on this level. Every supported statistics
+ * counter type is supported by at least one leaf node
+ * on this level, which may not be true for all the leaf
+ * nodes on this level.
+ * @see enum rte_tm_stats_type
+ */
+ uint64_t stats_mask;
+ } leaf;
+ };
+};
+
+/**
+ * Traffic manager node capabilities
+ */
+struct rte_tm_node_capabilities {
+ /** Private shaper support for the current node. */
+ int shaper_private_supported;
+
+ /** Dual rate shaping support for private shaper of current node.
+ * Valid only when private shaper is supported by the current node.
+ */
+ int shaper_private_dual_rate_supported;
+
+ /** Minimum committed/peak rate (bytes per second) for private
+ * shaper of current node. Valid only when private shaper is supported
+ * by the current node.
+ */
+ uint64_t shaper_private_rate_min;
+
+ /** Maximum committed/peak rate (bytes per second) for private
+ * shaper of current node. Valid only when private shaper is supported
+ * by the current node.
+ */
+ uint64_t shaper_private_rate_max;
+
+ /** Maximum number of shared shapers the current node can be part of.
+ * The value of zero indicates that shared shapers are not supported by
+ * the current node.
+ */
+ uint32_t shaper_shared_n_max;
+
+ RTE_STD_C11
+ union {
+ /** Items valid only for non-leaf nodes. */
+ struct {
+ /** Maximum number of children nodes. */
+ uint32_t sched_n_children_max;
+
+ /** Maximum number of supported priority levels. The
+ * value of zero is invalid. The value of 1 indicates
+ * that only priority 0 is supported, which essentially
+ * means that Strict Priority (SP) algorithm is not
+ * supported.
+ */
+ uint32_t sched_sp_n_priorities_max;
+
+ /** Maximum number of sibling nodes that can have the
+ * same priority at any given time, i.e. maximum size
+ * of the WFQ sibling node group. The value of zero
+ * is invalid. The value of 1 indicates that WFQ
+ * algorithm is not supported. The maximum value is
+ * *sched_n_children_max*.
+ */
+ uint32_t sched_wfq_n_children_per_group_max;
+
+ /** Maximum number of priority levels that can have
+ * more than one child node at any given time, i.e.
+ * maximum number of WFQ sibling node groups that have
+ * two or more members. The value of zero states that
+ * WFQ algorithm is not supported. The value of 1
+ * indicates that (*sched_sp_n_priorities_max* - 1)
+ * priority levels have at most one child node, so there
+ * can be only one priority level with two or more
+ * sibling nodes making up a WFQ group. The maximum
+ * value is: min(floor(*sched_n_children_max* / 2),
+ * *sched_sp_n_priorities_max*).
+ */
+ uint32_t sched_wfq_n_groups_max;
+
+ /** Maximum WFQ weight. The value of 1 indicates that
+ * all sibling nodes with same priority have the same
+ * WFQ weight, so WFQ is reduced to FQ.
+ */
+ uint32_t sched_wfq_weight_max;
+ } nonleaf;
+
+ /** Items valid only for leaf nodes. */
+ struct {
+ /** Head drop algorithm support for current node. */
+ int cman_head_drop_supported;
+
+ /** Private WRED context support for current node. */
+ int cman_wred_context_private_supported;
+
+ /** Maximum number of shared WRED contexts the current
+ * node can be part of. The value of zero indicates that
+ * shared WRED contexts are not supported by the current
+ * node.
+ */
+ uint32_t cman_wred_context_shared_n_max;
+ } leaf;
+ };
+
+ /** Mask of statistics counter types supported by the current node.
+ * @see enum rte_tm_stats_type
+ */
+ uint64_t stats_mask;
+};
+
+/**
+ * Congestion management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion. On request of writing a new packet
+ * into the current queue while the queue is full, the *tail drop* algorithm
+ * drops the new packet while leaving the queue unmodified, as opposed to *head
+ * drop* algorithm, which drops the packet at the head of the queue (the oldest
+ * packet waiting in the queue) and admits the new packet at the tail of the
+ * queue.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ */
+enum rte_tm_cman_mode {
+ RTE_TM_CMAN_TAIL_DROP = 0, /**< Tail drop */
+ RTE_TM_CMAN_HEAD_DROP, /**< Head drop */
+ RTE_TM_CMAN_WRED, /**< Weighted Random Early Detection (WRED) */
+};
+
+/**
+ * Random Early Detection (RED) profile
+ */
+struct rte_tm_red_params {
+ /** Minimum queue threshold */
+ uint16_t min_th;
+
+ /** Maximum queue threshold */
+ uint16_t max_th;
+
+ /** Inverse of packet marking probability maximum value (maxp), i.e.
+ * maxp_inv = 1 / maxp
+ */
+ uint16_t maxp_inv;
+
+ /** Negated log2 of queue weight (wq), i.e. wq = 1 / (2 ^ wq_log2) */
+ uint16_t wq_log2;
+};
+
+/**
+ * Weighted RED (WRED) profile
+ *
+ * Multiple WRED contexts can share the same WRED profile. Each leaf node with
+ * WRED enabled as its congestion management mode has zero or one private WRED
+ * context (only one leaf node using it) and/or zero, one or several shared
+ * WRED contexts (multiple leaf nodes use the same WRED context). A private
+ * WRED context is used to perform congestion management for a single leaf
+ * node, while a shared WRED context is used to perform congestion management
+ * for a group of leaf nodes.
+ */
+struct rte_tm_wred_params {
+ /** One set of RED parameters per packet color */
+ struct rte_tm_red_params red_params[RTE_TM_COLORS];
+};
+
+/**
+ * Token bucket
+ */
+struct rte_tm_token_bucket {
+ /** Token bucket rate (bytes per second) */
+ uint64_t rate;
+
+ /** Token bucket size (bytes), a.k.a. max burst size */
+ uint64_t size;
+};
+
+/**
+ * Shaper (rate limiter) profile
+ *
+ * Multiple shaper instances can share the same shaper profile. Each node has
+ * zero or one private shaper (only one node using it) and/or zero, one or
+ * several shared shapers (multiple nodes use the same shaper instance).
+ * A private shaper is used to perform traffic shaping for a single node, while
+ * a shared shaper is used to perform traffic shaping for a group of nodes.
+ *
+ * Single rate shapers use a single token bucket. A single rate shaper can be
+ * configured by setting the rate of the committed bucket to zero, which
+ * effectively disables this bucket. The peak bucket is used to limit the rate
+ * and the burst size for the current shaper.
+ *
+ * Dual rate shapers use both the committed and the peak token buckets. The
+ * rate of the peak bucket has to be bigger than zero, as well as greater than
+ * or equal to the rate of the committed bucket.
+ */
+struct rte_tm_shaper_params {
+ /** Committed token bucket */
+ struct rte_tm_token_bucket committed;
+
+ /** Peak token bucket */
+ struct rte_tm_token_bucket peak;
+
+ /** Signed value to be added to the length of each packet for the
+ * purpose of shaping. Can be used to correct the packet length with
+ * the framing overhead bytes that are also consumed on the wire (e.g.
+ * RTE_TM_ETH_FRAMING_OVERHEAD_FCS).
+ */
+ int32_t pkt_length_adjust;
+};
+
+/**
+ * Node parameters
+ *
+ * Each non-leaf node has multiple inputs (its children nodes) and single output
+ * (which is input to its parent node). It arbitrates its inputs using Strict
+ * Priority (SP) and Weighted Fair Queuing (WFQ) algorithms to schedule input
+ * packets to its output while observing its shaping (rate limiting)
+ * constraints.
+ *
+ * Algorithms such as Weighted Round Robin (WRR), Byte-level WRR, Deficit WRR
+ * (DWRR), etc. are considered approximations of the WFQ ideal and are
+ * assimilated to WFQ, although an associated implementation-dependent trade-off
+ * on accuracy, performance and resource usage might exist.
+ *
+ * Children nodes with different priorities are scheduled using the SP algorithm
+ * based on their priority, with zero (0) as the highest priority. Children with
+ * the same priority are scheduled using the WFQ algorithm according to their
+ * weights. The WFQ weight of a given child node is relative to the sum of the
+ * weights of all its sibling nodes that have the same priority, with one (1) as
+ * the lowest weight. For each SP priority, the WFQ weight mode can be set as
+ * either byte-based or packet-based.
+ *
+ * Each leaf node sits on top of a TX queue of the current Ethernet port. Hence,
+ * the leaf nodes are predefined, with their node IDs set to 0 .. (N-1), where N
+ * is the number of TX queues configured for the current Ethernet port. The
+ * non-leaf nodes have their IDs generated by the application.
+ */
+struct rte_tm_node_params {
+ /** Shaper profile for the private shaper. The absence of the private
+ * shaper for the current node is indicated by setting this parameter
+ * to RTE_TM_SHAPER_PROFILE_ID_NONE.
+ */
+ uint32_t shaper_profile_id;
+
+ /** User allocated array of valid shared shaper IDs. */
+ uint32_t *shared_shaper_id;
+
+ /** Number of shared shaper IDs in the *shared_shaper_id* array. */
+ uint32_t n_shared_shapers;
+
+ RTE_STD_C11
+ union {
+ /** Parameters only valid for non-leaf nodes. */
+ struct {
+ /** WFQ weight mode for each SP priority. When NULL, it
+ * indicates that WFQ is to be used for all priorities.
+ * When non-NULL, it points to a pre-allocated array of
+ * *n_sp_priorities* values, with non-zero value for
+ * byte-mode and zero for packet-mode.
+ */
+ int *wfq_weight_mode;
+
+ /** Number of SP priorities. */
+ uint32_t n_sp_priorities;
+ } nonleaf;
+
+ /** Parameters only valid for leaf nodes. */
+ struct {
+ /** Congestion management mode */
+ enum rte_tm_cman_mode cman;
+
+ /** WRED parameters (only valid when *cman* is set to
+ * WRED).
+ */
+ struct {
+ /** WRED profile for private WRED context. The
+ * absence of a private WRED context for the
+ * current leaf node is indicated by value
+ * RTE_TM_WRED_PROFILE_ID_NONE.
+ */
+ uint32_t wred_profile_id;
+
+ /** User allocated array of shared WRED context
+ * IDs. When set to NULL, it indicates that the
+ * current leaf node should not currently be
+ * part of any shared WRED contexts.
+ */
+ uint32_t *shared_wred_context_id;
+
+ /** Number of elements in the
+ * *shared_wred_context_id* array. Only valid
+ * when *shared_wred_context_id* is non-NULL,
+ * in which case it should be non-zero.
+ */
+ uint32_t n_shared_wred_contexts;
+ } wred;
+ } leaf;
+ };
+
+ /** Mask of statistics counter types to be enabled for this node. This
+ * needs to be a subset of the statistics counter types available for
+ * the current node. Any statistics counter type not included in this
+ * set is to be disabled for the current node.
+ * @see enum rte_tm_stats_type
+ */
+ uint64_t stats_mask;
+};
+
+/**
+ * Verbose error types.
+ *
+ * Most of them provide the type of the object referenced by struct
+ * rte_tm_error::cause.
+ */
+enum rte_tm_error_type {
+ RTE_TM_ERROR_TYPE_NONE, /**< No error. */
+ RTE_TM_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */
+ RTE_TM_ERROR_TYPE_CAPABILITIES,
+ RTE_TM_ERROR_TYPE_LEVEL_ID,
+ RTE_TM_ERROR_TYPE_WRED_PROFILE,
+ RTE_TM_ERROR_TYPE_WRED_PROFILE_GREEN,
+ RTE_TM_ERROR_TYPE_WRED_PROFILE_YELLOW,
+ RTE_TM_ERROR_TYPE_WRED_PROFILE_RED,
+ RTE_TM_ERROR_TYPE_WRED_PROFILE_ID,
+ RTE_TM_ERROR_TYPE_SHARED_WRED_CONTEXT_ID,
+ RTE_TM_ERROR_TYPE_SHAPER_PROFILE,
+ RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_RATE,
+ RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_SIZE,
+ RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_RATE,
+ RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_SIZE,
+ RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PKT_ADJUST_LEN,
+ RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID,
+ RTE_TM_ERROR_TYPE_SHARED_SHAPER_ID,
+ RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID,
+ RTE_TM_ERROR_TYPE_NODE_PRIORITY,
+ RTE_TM_ERROR_TYPE_NODE_WEIGHT,
+ RTE_TM_ERROR_TYPE_NODE_PARAMS,
+ RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID,
+ RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID,
+ RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS,
+ RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE,
+ RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES,
+ RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN,
+ RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID,
+ RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID,
+ RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS,
+ RTE_TM_ERROR_TYPE_NODE_PARAMS_STATS,
+ RTE_TM_ERROR_TYPE_NODE_ID,
+};
+
+/**
+ * Verbose error structure definition.
+ *
+ * This object is normally allocated by applications and set by PMDs, the
+ * message points to a constant string which does not need to be freed by
+ * the application, however its pointer can be considered valid only as long
+ * as its associated DPDK port remains configured. Closing the underlying
+ * device or unloading the PMD invalidates it.
+ *
+ * Both cause and message may be NULL regardless of the error type.
+ */
+struct rte_tm_error {
+ enum rte_tm_error_type type; /**< Cause field and error type. */
+ const void *cause; /**< Object responsible for the error. */
+ const char *message; /**< Human-readable error message. */
+};
+
+/**
+ * Traffic manager get number of leaf nodes
+ *
+ * Each leaf node sits on on top of a TX queue of the current Ethernet port.
+ * Therefore, the set of leaf nodes is predefined, their number is always equal
+ * to N (where N is the number of TX queues configured for the current port)
+ * and their IDs are 0 .. (N-1).
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[out] n_leaf_nodes
+ * Number of leaf nodes for the current port.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ */
+int
+rte_tm_get_number_of_leaf_nodes(uint8_t port_id,
+ uint32_t *n_leaf_nodes,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager node ID validate and type (i.e. leaf or non-leaf) get
+ *
+ * The leaf nodes have predefined IDs in the range of 0 .. (N-1), where N is
+ * the number of TX queues of the current Ethernet port. The non-leaf nodes
+ * have their IDs generated by the application outside of the above range,
+ * which is reserved for leaf nodes.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] node_id
+ * Node ID value. Needs to be valid.
+ * @param[out] is_leaf
+ * Set to non-zero value when node is leaf and to zero otherwise (non-leaf).
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ */
+int
+rte_tm_node_type_get(uint8_t port_id,
+ uint32_t node_id,
+ int *is_leaf,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager capabilities get
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[out] cap
+ * Traffic manager capabilities. Needs to be pre-allocated and valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ */
+int
+rte_tm_capabilities_get(uint8_t port_id,
+ struct rte_tm_capabilities *cap,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager level capabilities get
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] level_id
+ * The hierarchy level identifier. The value of 0 identifies the level of the
+ * root node.
+ * @param[out] cap
+ * Traffic manager level capabilities. Needs to be pre-allocated and valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ */
+int
+rte_tm_level_capabilities_get(uint8_t port_id,
+ uint32_t level_id,
+ struct rte_tm_level_capabilities *cap,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager node capabilities get
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] node_id
+ * Node ID. Needs to be valid.
+ * @param[out] cap
+ * Traffic manager node capabilities. Needs to be pre-allocated and valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ */
+int
+rte_tm_node_capabilities_get(uint8_t port_id,
+ uint32_t node_id,
+ struct rte_tm_node_capabilities *cap,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager WRED profile add
+ *
+ * Create a new WRED profile with ID set to *wred_profile_id*. The new profile
+ * is used to create one or several WRED contexts.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] wred_profile_id
+ * WRED profile ID for the new profile. Needs to be unused.
+ * @param[in] profile
+ * WRED profile parameters. Needs to be pre-allocated and valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see struct rte_tm_capabilities::cman_wred_context_n_max
+ */
+int
+rte_tm_wred_profile_add(uint8_t port_id,
+ uint32_t wred_profile_id,
+ struct rte_tm_wred_params *profile,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager WRED profile delete
+ *
+ * Delete an existing WRED profile. This operation fails when there is
+ * currently at least one user (i.e. WRED context) of this WRED profile.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] wred_profile_id
+ * WRED profile ID. Needs to be the valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see struct rte_tm_capabilities::cman_wred_context_n_max
+ */
+int
+rte_tm_wred_profile_delete(uint8_t port_id,
+ uint32_t wred_profile_id,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager shared WRED context add or update
+ *
+ * When *shared_wred_context_id* is invalid, a new WRED context with this ID is
+ * created by using the WRED profile identified by *wred_profile_id*.
+ *
+ * When *shared_wred_context_id* is valid, this WRED context is no longer using
+ * the profile previously assigned to it and is updated to use the profile
+ * identified by *wred_profile_id*.
+ *
+ * A valid shared WRED context can be assigned to several hierarchy leaf nodes
+ * configured to use WRED as the congestion management mode.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] shared_wred_context_id
+ * Shared WRED context ID
+ * @param[in] wred_profile_id
+ * WRED profile ID. Needs to be the valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max
+ */
+int
+rte_tm_shared_wred_context_add_update(uint8_t port_id,
+ uint32_t shared_wred_context_id,
+ uint32_t wred_profile_id,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager shared WRED context delete
+ *
+ * Delete an existing shared WRED context. This operation fails when there is
+ * currently at least one user (i.e. hierarchy leaf node) of this shared WRED
+ * context.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] shared_wred_context_id
+ * Shared WRED context ID. Needs to be the valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max
+ */
+int
+rte_tm_shared_wred_context_delete(uint8_t port_id,
+ uint32_t shared_wred_context_id,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager shaper profile add
+ *
+ * Create a new shaper profile with ID set to *shaper_profile_id*. The new
+ * shaper profile is used to create one or several shapers.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] shaper_profile_id
+ * Shaper profile ID for the new profile. Needs to be unused.
+ * @param[in] profile
+ * Shaper profile parameters. Needs to be pre-allocated and valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see struct rte_tm_capabilities::shaper_n_max
+ */
+int
+rte_tm_shaper_profile_add(uint8_t port_id,
+ uint32_t shaper_profile_id,
+ struct rte_tm_shaper_params *profile,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager shaper profile delete
+ *
+ * Delete an existing shaper profile. This operation fails when there is
+ * currently at least one user (i.e. shaper) of this shaper profile.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] shaper_profile_id
+ * Shaper profile ID. Needs to be the valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see struct rte_tm_capabilities::shaper_n_max
+ */
+int
+rte_tm_shaper_profile_delete(uint8_t port_id,
+ uint32_t shaper_profile_id,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager shared shaper add or update
+ *
+ * When *shared_shaper_id* is not a valid shared shaper ID, a new shared shaper
+ * with this ID is created using the shaper profile identified by
+ * *shaper_profile_id*.
+ *
+ * When *shared_shaper_id* is a valid shared shaper ID, this shared shaper is
+ * no longer using the shaper profile previously assigned to it and is updated
+ * to use the shaper profile identified by *shaper_profile_id*.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] shared_shaper_id
+ * Shared shaper ID
+ * @param[in] shaper_profile_id
+ * Shaper profile ID. Needs to be the valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see struct rte_tm_capabilities::shaper_shared_n_max
+ */
+int
+rte_tm_shared_shaper_add_update(uint8_t port_id,
+ uint32_t shared_shaper_id,
+ uint32_t shaper_profile_id,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager shared shaper delete
+ *
+ * Delete an existing shared shaper. This operation fails when there is
+ * currently at least one user (i.e. hierarchy node) of this shared shaper.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] shared_shaper_id
+ * Shared shaper ID. Needs to be the valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see struct rte_tm_capabilities::shaper_shared_n_max
+ */
+int
+rte_tm_shared_shaper_delete(uint8_t port_id,
+ uint32_t shared_shaper_id,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager node add
+ *
+ * Create new node and connect it as child of an existing node. The new node is
+ * further identified by *node_id*, which needs to be unused by any of the
+ * existing nodes. The parent node is identified by *parent_node_id*, which
+ * needs to be the valid ID of an existing non-leaf node. The parent node is
+ * going to use the provided SP *priority* and WFQ *weight* to schedule its new
+ * child node.
+ *
+ * This function has to be called for both leaf and non-leaf nodes. In the case
+ * of leaf nodes (i.e. *node_id* is within the range of 0 .. (N-1), with N as
+ * the number of configured TX queues of the current port), the leaf node is
+ * configured rather than created (as the set of leaf nodes is predefined) and
+ * it is also connected as child of an existing node.
+ *
+ * The first node that is added becomes the root node and all the nodes that
+ * are subsequently added have to be added as descendants of the root node. The
+ * parent of the root node has to be specified as RTE_TM_NODE_ID_NULL and there
+ * can only be one node with this parent ID (i.e. the root node). Further
+ * restrictions for root node: needs to be non-leaf, its private shaper profile
+ * needs to be valid and single rate, cannot use any shared shapers.
+ *
+ * When called before rte_tm_hierarchy_commit() invocation, this function is
+ * typically used to define the initial start-up hierarchy for the port.
+ * Provided that dynamic hierarchy updates are supported by the current port (as
+ * advertised in the port capability set), this function can be also called
+ * after the rte_tm_hierarchy_commit() invocation.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] node_id
+ * Node ID. Needs to be unused by any of the existing nodes.
+ * @param[in] parent_node_id
+ * Parent node ID. Needs to be the valid.
+ * @param[in] priority
+ * Node priority. The highest node priority is zero. Used by the SP algorithm
+ * running on the parent of the current node for scheduling this child node.
+ * @param[in] weight
+ * Node weight. The node weight is relative to the weight sum of all siblings
+ * that have the same priority. The lowest weight is one. Used by the WFQ
+ * algorithm running on the parent of the current node for scheduling this
+ * child node.
+ * @param[in] level_id
+ * Level ID that should be met by this node. The hierarchy level of the
+ * current node is already fully specified through its parent node (i.e. the
+ * level of this node is equal to the level of its parent node plus one),
+ * therefore the reason for providing this parameter is to enable the
+ * application to perform step-by-step checking of the node level during
+ * successive invocations of this function. When not desired, this check can
+ * be disabled by assigning value RTE_TM_NODE_LEVEL_ID_ANY to this parameter.
+ * @param[in] params
+ * Node parameters. Needs to be pre-allocated and valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see rte_tm_hierarchy_commit()
+ * @see RTE_TM_UPDATE_NODE_ADD_DELETE
+ * @see RTE_TM_NODE_LEVEL_ID_ANY
+ * @see struct rte_tm_capabilities
+ */
+int
+rte_tm_node_add(uint8_t port_id,
+ uint32_t node_id,
+ uint32_t parent_node_id,
+ uint32_t priority,
+ uint32_t weight,
+ uint32_t level_id,
+ struct rte_tm_node_params *params,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager node delete
+ *
+ * Delete an existing node. This operation fails when this node currently has
+ * at least one user (i.e. child node).
+ *
+ * When called before rte_tm_hierarchy_commit() invocation, this function is
+ * typically used to define the initial start-up hierarchy for the port.
+ * Provided that dynamic hierarchy updates are supported by the current port (as
+ * advertised in the port capability set), this function can be also called
+ * after the rte_tm_hierarchy_commit() invocation.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] node_id
+ * Node ID. Needs to be valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see RTE_TM_UPDATE_NODE_ADD_DELETE
+ */
+int
+rte_tm_node_delete(uint8_t port_id,
+ uint32_t node_id,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager node suspend
+ *
+ * Suspend an existing node. While the node is in suspended state, no packet is
+ * scheduled from this node and its descendants. The node exits the suspended
+ * state through the node resume operation.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] node_id
+ * Node ID. Needs to be valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see rte_tm_node_resume()
+ * @see RTE_TM_UPDATE_NODE_SUSPEND_RESUME
+ */
+int
+rte_tm_node_suspend(uint8_t port_id,
+ uint32_t node_id,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager node resume
+ *
+ * Resume an existing node that is currently in suspended state. The node
+ * entered the suspended state as result of a previous node suspend operation.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] node_id
+ * Node ID. Needs to be valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see rte_tm_node_suspend()
+ * @see RTE_TM_UPDATE_NODE_SUSPEND_RESUME
+ */
+int
+rte_tm_node_resume(uint8_t port_id,
+ uint32_t node_id,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager hierarchy commit
+ *
+ * This function is called during the port initialization phase (before the
+ * Ethernet port is started) to freeze the start-up hierarchy.
+ *
+ * This function typically performs the following steps:
+ * a) It validates the start-up hierarchy that was previously defined for the
+ * current port through successive rte_tm_node_add() invocations;
+ * b) Assuming successful validation, it performs all the necessary port
+ * specific configuration operations to install the specified hierarchy on
+ * the current port, with immediate effect once the port is started.
+ *
+ * This function fails when the currently configured hierarchy is not supported
+ * by the Ethernet port, in which case the user can abort or try out another
+ * hierarchy configuration (e.g. a hierarchy with less leaf nodes), which can be
+ * build from scratch (when *clear_on_fail* is enabled) or by modifying the
+ * existing hierarchy configuration (when *clear_on_fail* is disabled).
+ *
+ * Note that this function can still fail due to other causes (e.g. not enough
+ * memory available in the system, etc), even though the specified hierarchy is
+ * supported in principle by the current port.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] clear_on_fail
+ * On function call failure, hierarchy is cleared when this parameter is
+ * non-zero and preserved when this parameter is equal to zero.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see rte_tm_node_add()
+ * @see rte_tm_node_delete()
+ */
+int
+rte_tm_hierarchy_commit(uint8_t port_id,
+ int clear_on_fail,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager node parent update
+ *
+ * Restriction for root node: its parent cannot be changed.
+ *
+ * This function can only be called after the rte_tm_hierarchy_commit()
+ * invocation. Its success depends on the port support for this operation, as
+ * advertised through the port capability set.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] node_id
+ * Node ID. Needs to be valid.
+ * @param[in] parent_node_id
+ * Node ID for the new parent. Needs to be valid.
+ * @param[in] priority
+ * Node priority. The highest node priority is zero. Used by the SP algorithm
+ * running on the parent of the current node for scheduling this child node.
+ * @param[in] weight
+ * Node weight. The node weight is relative to the weight sum of all siblings
+ * that have the same priority. The lowest weight is zero. Used by the WFQ
+ * algorithm running on the parent of the current node for scheduling this
+ * child node.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see RTE_TM_UPDATE_NODE_PARENT_KEEP_LEVEL
+ * @see RTE_TM_UPDATE_NODE_PARENT_CHANGE_LEVEL
+ */
+int
+rte_tm_node_parent_update(uint8_t port_id,
+ uint32_t node_id,
+ uint32_t parent_node_id,
+ uint32_t priority,
+ uint32_t weight,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager node private shaper update
+ *
+ * Restriction for the root node: its private shaper profile needs to be valid
+ * and single rate.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] node_id
+ * Node ID. Needs to be valid.
+ * @param[in] shaper_profile_id
+ * Shaper profile ID for the private shaper of the current node. Needs to be
+ * either valid shaper profile ID or RTE_TM_SHAPER_PROFILE_ID_NONE, with
+ * the latter disabling the private shaper of the current node.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see struct rte_tm_capabilities::shaper_private_n_max
+ */
+int
+rte_tm_node_shaper_update(uint8_t port_id,
+ uint32_t node_id,
+ uint32_t shaper_profile_id,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager node shared shapers update
+ *
+ * Restriction for root node: cannot use any shared rate shapers.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] node_id
+ * Node ID. Needs to be valid.
+ * @param[in] shared_shaper_id
+ * Shared shaper ID. Needs to be valid.
+ * @param[in] add
+ * Set to non-zero value to add this shared shaper to current node or to zero
+ * to delete this shared shaper from current node.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see struct rte_tm_capabilities::shaper_shared_n_max
+ */
+int
+rte_tm_node_shared_shaper_update(uint8_t port_id,
+ uint32_t node_id,
+ uint32_t shared_shaper_id,
+ int add,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager node enabled statistics counters update
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] node_id
+ * Node ID. Needs to be valid.
+ * @param[in] stats_mask
+ * Mask of statistics counter types to be enabled for the current node. This
+ * needs to be a subset of the statistics counter types available for the
+ * current node. Any statistics counter type not included in this set is to
+ * be disabled for the current node.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see enum rte_tm_stats_type
+ * @see RTE_TM_UPDATE_NODE_STATS
+ */
+int
+rte_tm_node_stats_update(uint8_t port_id,
+ uint32_t node_id,
+ uint64_t stats_mask,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager node WFQ weight mode update
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] node_id
+ * Node ID. Needs to be valid leaf node ID.
+ * @param[in] wfq_weight_mode
+ * WFQ weight mode for each SP priority. When NULL, it indicates that WFQ is
+ * to be used for all priorities. When non-NULL, it points to a pre-allocated
+ * array of *n_sp_priorities* values, with non-zero value for byte-mode and
+ * zero for packet-mode.
+ * @param[in] n_sp_priorities
+ * Number of SP priorities.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see RTE_TM_UPDATE_NODE_WFQ_WEIGHT_MODE
+ * @see RTE_TM_UPDATE_NODE_N_SP_PRIORITIES
+ */
+int
+rte_tm_node_wfq_weight_mode_update(uint8_t port_id,
+ uint32_t node_id,
+ int *wfq_weight_mode,
+ uint32_t n_sp_priorities,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager node congestion management mode update
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] node_id
+ * Node ID. Needs to be valid leaf node ID.
+ * @param[in] cman
+ * Congestion management mode.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see RTE_TM_UPDATE_NODE_CMAN
+ */
+int
+rte_tm_node_cman_update(uint8_t port_id,
+ uint32_t node_id,
+ enum rte_tm_cman_mode cman,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager node private WRED context update
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] node_id
+ * Node ID. Needs to be valid leaf node ID.
+ * @param[in] wred_profile_id
+ * WRED profile ID for the private WRED context of the current node. Needs to
+ * be either valid WRED profile ID or RTE_TM_WRED_PROFILE_ID_NONE, with the
+ * latter disabling the private WRED context of the current node.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see struct rte_tm_capabilities::cman_wred_context_private_n_max
+*/
+int
+rte_tm_node_wred_context_update(uint8_t port_id,
+ uint32_t node_id,
+ uint32_t wred_profile_id,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager node shared WRED context update
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] node_id
+ * Node ID. Needs to be valid leaf node ID.
+ * @param[in] shared_wred_context_id
+ * Shared WRED context ID. Needs to be valid.
+ * @param[in] add
+ * Set to non-zero value to add this shared WRED context to current node or
+ * to zero to delete this shared WRED context from current node.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max
+ */
+int
+rte_tm_node_shared_wred_context_update(uint8_t port_id,
+ uint32_t node_id,
+ uint32_t shared_wred_context_id,
+ int add,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager node statistics counters read
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] node_id
+ * Node ID. Needs to be valid.
+ * @param[out] stats
+ * When non-NULL, it contains the current value for the statistics counters
+ * enabled for the current node.
+ * @param[out] stats_mask
+ * When non-NULL, it contains the mask of statistics counter types that are
+ * currently enabled for this node, indicating which of the counters
+ * retrieved with the *stats* structure are valid.
+ * @param[in] clear
+ * When this parameter has a non-zero value, the statistics counters are
+ * cleared (i.e. set to zero) immediately after they have been read,
+ * otherwise the statistics counters are left untouched.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see enum rte_tm_stats_type
+ */
+int
+rte_tm_node_stats_read(uint8_t port_id,
+ uint32_t node_id,
+ struct rte_tm_node_stats *stats,
+ uint64_t *stats_mask,
+ int clear,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager packet marking - VLAN DEI (IEEE 802.1Q)
+ *
+ * IEEE 802.1p maps the traffic class to the VLAN Priority Code Point (PCP)
+ * field (3 bits), while IEEE 802.1q maps the drop priority to the VLAN Drop
+ * Eligible Indicator (DEI) field (1 bit), which was previously named Canonical
+ * Format Indicator (CFI).
+ *
+ * All VLAN frames of a given color get their DEI bit set if marking is enabled
+ * for this color; otherwise, their DEI bit is left as is (either set or not).
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] mark_green
+ * Set to non-zero value to enable marking of green packets and to zero to
+ * disable it.
+ * @param[in] mark_yellow
+ * Set to non-zero value to enable marking of yellow packets and to zero to
+ * disable it.
+ * @param[in] mark_red
+ * Set to non-zero value to enable marking of red packets and to zero to
+ * disable it.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see struct rte_tm_capabilities::mark_vlan_dei_supported
+ */
+int
+rte_tm_mark_vlan_dei(uint8_t port_id,
+ int mark_green,
+ int mark_yellow,
+ int mark_red,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager packet marking - IPv4 / IPv6 ECN (IETF RFC 3168)
+ *
+ * IETF RFCs 2474 and 3168 reorganize the IPv4 Type of Service (TOS) field
+ * (8 bits) and the IPv6 Traffic Class (TC) field (8 bits) into Differentiated
+ * Services Codepoint (DSCP) field (6 bits) and Explicit Congestion
+ * Notification (ECN) field (2 bits). The DSCP field is typically used to
+ * encode the traffic class and/or drop priority (RFC 2597), while the ECN
+ * field is used by RFC 3168 to implement a congestion notification mechanism
+ * to be leveraged by transport layer protocols such as TCP and SCTP that have
+ * congestion control mechanisms.
+ *
+ * When congestion is experienced, as alternative to dropping the packet,
+ * routers can change the ECN field of input packets from 2'b01 or 2'b10
+ * (values indicating that source endpoint is ECN-capable) to 2'b11 (meaning
+ * that congestion is experienced). The destination endpoint can use the
+ * ECN-Echo (ECE) TCP flag to relay the congestion indication back to the
+ * source endpoint, which acknowledges it back to the destination endpoint with
+ * the Congestion Window Reduced (CWR) TCP flag.
+ *
+ * All IPv4/IPv6 packets of a given color with ECN set to 2’b01 or 2’b10
+ * carrying TCP or SCTP have their ECN set to 2’b11 if the marking feature is
+ * enabled for the current color, otherwise the ECN field is left as is.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] mark_green
+ * Set to non-zero value to enable marking of green packets and to zero to
+ * disable it.
+ * @param[in] mark_yellow
+ * Set to non-zero value to enable marking of yellow packets and to zero to
+ * disable it.
+ * @param[in] mark_red
+ * Set to non-zero value to enable marking of red packets and to zero to
+ * disable it.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see struct rte_tm_capabilities::mark_ip_ecn_tcp_supported
+ * @see struct rte_tm_capabilities::mark_ip_ecn_sctp_supported
+ */
+int
+rte_tm_mark_ip_ecn(uint8_t port_id,
+ int mark_green,
+ int mark_yellow,
+ int mark_red,
+ struct rte_tm_error *error);
+
+/**
+ * Traffic manager packet marking - IPv4 / IPv6 DSCP (IETF RFC 2597)
+ *
+ * IETF RFC 2597 maps the traffic class and the drop priority to the IPv4/IPv6
+ * Differentiated Services Codepoint (DSCP) field (6 bits). Here are the DSCP
+ * values proposed by this RFC:
+ *
+ * <pre> Class 1 Class 2 Class 3 Class 4 </pre>
+ * <pre> +----------+----------+----------+----------+</pre>
+ * <pre>Low Drop Prec | 001010 | 010010 | 011010 | 100010 |</pre>
+ * <pre>Medium Drop Prec | 001100 | 010100 | 011100 | 100100 |</pre>
+ * <pre>High Drop Prec | 001110 | 010110 | 011110 | 100110 |</pre>
+ * <pre> +----------+----------+----------+----------+</pre>
+ *
+ * There are 4 traffic classes (classes 1 .. 4) encoded by DSCP bits 1 and 2,
+ * as well as 3 drop priorities (low/medium/high) encoded by DSCP bits 3 and 4.
+ *
+ * All IPv4/IPv6 packets have their color marked into DSCP bits 3 and 4 as
+ * follows: green mapped to Low Drop Precedence (2’b01), yellow to Medium
+ * (2’b10) and red to High (2’b11). Marking needs to be explicitly enabled
+ * for each color; when not enabled for a given color, the DSCP field of all
+ * packets with that color is left as is.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] mark_green
+ * Set to non-zero value to enable marking of green packets and to zero to
+ * disable it.
+ * @param[in] mark_yellow
+ * Set to non-zero value to enable marking of yellow packets and to zero to
+ * disable it.
+ * @param[in] mark_red
+ * Set to non-zero value to enable marking of red packets and to zero to
+ * disable it.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see struct rte_tm_capabilities::mark_ip_dscp_supported
+ */
+int
+rte_tm_mark_ip_dscp(uint8_t port_id,
+ int mark_green,
+ int mark_yellow,
+ int mark_red,
+ struct rte_tm_error *error);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_TM_H__ */
diff --git a/lib/librte_ether/rte_tm_driver.h b/lib/librte_ether/rte_tm_driver.h
new file mode 100644
index 00000000..a5b698fe
--- /dev/null
+++ b/lib/librte_ether/rte_tm_driver.h
@@ -0,0 +1,366 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_TM_DRIVER_H__
+#define __INCLUDE_RTE_TM_DRIVER_H__
+
+/**
+ * @file
+ * RTE Generic Traffic Manager API (Driver Side)
+ *
+ * This file provides implementation helpers for internal use by PMDs, they
+ * are not intended to be exposed to applications and are not subject to ABI
+ * versioning.
+ */
+
+#include <stdint.h>
+
+#include <rte_errno.h>
+#include "rte_ethdev.h"
+#include "rte_tm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @internal Traffic manager node ID validate and type get */
+typedef int (*rte_tm_node_type_get_t)(struct rte_eth_dev *dev,
+ uint32_t node_id,
+ int *is_leaf,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager capabilities get */
+typedef int (*rte_tm_capabilities_get_t)(struct rte_eth_dev *dev,
+ struct rte_tm_capabilities *cap,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager level capabilities get */
+typedef int (*rte_tm_level_capabilities_get_t)(struct rte_eth_dev *dev,
+ uint32_t level_id,
+ struct rte_tm_level_capabilities *cap,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager node capabilities get */
+typedef int (*rte_tm_node_capabilities_get_t)(struct rte_eth_dev *dev,
+ uint32_t node_id,
+ struct rte_tm_node_capabilities *cap,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager WRED profile add */
+typedef int (*rte_tm_wred_profile_add_t)(struct rte_eth_dev *dev,
+ uint32_t wred_profile_id,
+ struct rte_tm_wred_params *profile,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager WRED profile delete */
+typedef int (*rte_tm_wred_profile_delete_t)(struct rte_eth_dev *dev,
+ uint32_t wred_profile_id,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager shared WRED context add */
+typedef int (*rte_tm_shared_wred_context_add_update_t)(
+ struct rte_eth_dev *dev,
+ uint32_t shared_wred_context_id,
+ uint32_t wred_profile_id,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager shared WRED context delete */
+typedef int (*rte_tm_shared_wred_context_delete_t)(
+ struct rte_eth_dev *dev,
+ uint32_t shared_wred_context_id,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager shaper profile add */
+typedef int (*rte_tm_shaper_profile_add_t)(struct rte_eth_dev *dev,
+ uint32_t shaper_profile_id,
+ struct rte_tm_shaper_params *profile,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager shaper profile delete */
+typedef int (*rte_tm_shaper_profile_delete_t)(struct rte_eth_dev *dev,
+ uint32_t shaper_profile_id,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager shared shaper add/update */
+typedef int (*rte_tm_shared_shaper_add_update_t)(struct rte_eth_dev *dev,
+ uint32_t shared_shaper_id,
+ uint32_t shaper_profile_id,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager shared shaper delete */
+typedef int (*rte_tm_shared_shaper_delete_t)(struct rte_eth_dev *dev,
+ uint32_t shared_shaper_id,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager node add */
+typedef int (*rte_tm_node_add_t)(struct rte_eth_dev *dev,
+ uint32_t node_id,
+ uint32_t parent_node_id,
+ uint32_t priority,
+ uint32_t weight,
+ uint32_t level_id,
+ struct rte_tm_node_params *params,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager node delete */
+typedef int (*rte_tm_node_delete_t)(struct rte_eth_dev *dev,
+ uint32_t node_id,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager node suspend */
+typedef int (*rte_tm_node_suspend_t)(struct rte_eth_dev *dev,
+ uint32_t node_id,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager node resume */
+typedef int (*rte_tm_node_resume_t)(struct rte_eth_dev *dev,
+ uint32_t node_id,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager hierarchy commit */
+typedef int (*rte_tm_hierarchy_commit_t)(struct rte_eth_dev *dev,
+ int clear_on_fail,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager node parent update */
+typedef int (*rte_tm_node_parent_update_t)(struct rte_eth_dev *dev,
+ uint32_t node_id,
+ uint32_t parent_node_id,
+ uint32_t priority,
+ uint32_t weight,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager node shaper update */
+typedef int (*rte_tm_node_shaper_update_t)(struct rte_eth_dev *dev,
+ uint32_t node_id,
+ uint32_t shaper_profile_id,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager node shaper update */
+typedef int (*rte_tm_node_shared_shaper_update_t)(struct rte_eth_dev *dev,
+ uint32_t node_id,
+ uint32_t shared_shaper_id,
+ int32_t add,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager node stats update */
+typedef int (*rte_tm_node_stats_update_t)(struct rte_eth_dev *dev,
+ uint32_t node_id,
+ uint64_t stats_mask,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager node WFQ weight mode update */
+typedef int (*rte_tm_node_wfq_weight_mode_update_t)(
+ struct rte_eth_dev *dev,
+ uint32_t node_id,
+ int *wfq_weigth_mode,
+ uint32_t n_sp_priorities,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager node congestion management mode update */
+typedef int (*rte_tm_node_cman_update_t)(struct rte_eth_dev *dev,
+ uint32_t node_id,
+ enum rte_tm_cman_mode cman,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager node WRED context update */
+typedef int (*rte_tm_node_wred_context_update_t)(
+ struct rte_eth_dev *dev,
+ uint32_t node_id,
+ uint32_t wred_profile_id,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager node WRED context update */
+typedef int (*rte_tm_node_shared_wred_context_update_t)(
+ struct rte_eth_dev *dev,
+ uint32_t node_id,
+ uint32_t shared_wred_context_id,
+ int add,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager read stats counters for specific node */
+typedef int (*rte_tm_node_stats_read_t)(struct rte_eth_dev *dev,
+ uint32_t node_id,
+ struct rte_tm_node_stats *stats,
+ uint64_t *stats_mask,
+ int clear,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager packet marking - VLAN DEI */
+typedef int (*rte_tm_mark_vlan_dei_t)(struct rte_eth_dev *dev,
+ int mark_green,
+ int mark_yellow,
+ int mark_red,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager packet marking - IPv4/IPv6 ECN */
+typedef int (*rte_tm_mark_ip_ecn_t)(struct rte_eth_dev *dev,
+ int mark_green,
+ int mark_yellow,
+ int mark_red,
+ struct rte_tm_error *error);
+
+/** @internal Traffic manager packet marking - IPv4/IPv6 DSCP */
+typedef int (*rte_tm_mark_ip_dscp_t)(struct rte_eth_dev *dev,
+ int mark_green,
+ int mark_yellow,
+ int mark_red,
+ struct rte_tm_error *error);
+
+struct rte_tm_ops {
+ /** Traffic manager node type get */
+ rte_tm_node_type_get_t node_type_get;
+
+ /** Traffic manager capabilities_get */
+ rte_tm_capabilities_get_t capabilities_get;
+ /** Traffic manager level capabilities_get */
+ rte_tm_level_capabilities_get_t level_capabilities_get;
+ /** Traffic manager node capabilities get */
+ rte_tm_node_capabilities_get_t node_capabilities_get;
+
+ /** Traffic manager WRED profile add */
+ rte_tm_wred_profile_add_t wred_profile_add;
+ /** Traffic manager WRED profile delete */
+ rte_tm_wred_profile_delete_t wred_profile_delete;
+ /** Traffic manager shared WRED context add/update */
+ rte_tm_shared_wred_context_add_update_t
+ shared_wred_context_add_update;
+ /** Traffic manager shared WRED context delete */
+ rte_tm_shared_wred_context_delete_t
+ shared_wred_context_delete;
+
+ /** Traffic manager shaper profile add */
+ rte_tm_shaper_profile_add_t shaper_profile_add;
+ /** Traffic manager shaper profile delete */
+ rte_tm_shaper_profile_delete_t shaper_profile_delete;
+ /** Traffic manager shared shaper add/update */
+ rte_tm_shared_shaper_add_update_t shared_shaper_add_update;
+ /** Traffic manager shared shaper delete */
+ rte_tm_shared_shaper_delete_t shared_shaper_delete;
+
+ /** Traffic manager node add */
+ rte_tm_node_add_t node_add;
+ /** Traffic manager node delete */
+ rte_tm_node_delete_t node_delete;
+ /** Traffic manager node suspend */
+ rte_tm_node_suspend_t node_suspend;
+ /** Traffic manager node resume */
+ rte_tm_node_resume_t node_resume;
+ /** Traffic manager hierarchy commit */
+ rte_tm_hierarchy_commit_t hierarchy_commit;
+
+ /** Traffic manager node parent update */
+ rte_tm_node_parent_update_t node_parent_update;
+ /** Traffic manager node shaper update */
+ rte_tm_node_shaper_update_t node_shaper_update;
+ /** Traffic manager node shared shaper update */
+ rte_tm_node_shared_shaper_update_t node_shared_shaper_update;
+ /** Traffic manager node stats update */
+ rte_tm_node_stats_update_t node_stats_update;
+ /** Traffic manager node WFQ weight mode update */
+ rte_tm_node_wfq_weight_mode_update_t node_wfq_weight_mode_update;
+ /** Traffic manager node congestion management mode update */
+ rte_tm_node_cman_update_t node_cman_update;
+ /** Traffic manager node WRED context update */
+ rte_tm_node_wred_context_update_t node_wred_context_update;
+ /** Traffic manager node shared WRED context update */
+ rte_tm_node_shared_wred_context_update_t
+ node_shared_wred_context_update;
+ /** Traffic manager read statistics counters for current node */
+ rte_tm_node_stats_read_t node_stats_read;
+
+ /** Traffic manager packet marking - VLAN DEI */
+ rte_tm_mark_vlan_dei_t mark_vlan_dei;
+ /** Traffic manager packet marking - IPv4/IPv6 ECN */
+ rte_tm_mark_ip_ecn_t mark_ip_ecn;
+ /** Traffic manager packet marking - IPv4/IPv6 DSCP */
+ rte_tm_mark_ip_dscp_t mark_ip_dscp;
+};
+
+/**
+ * Initialize generic error structure.
+ *
+ * This function also sets rte_errno to a given value.
+ *
+ * @param[out] error
+ * Pointer to error structure (may be NULL).
+ * @param[in] code
+ * Related error code (rte_errno).
+ * @param[in] type
+ * Cause field and error type.
+ * @param[in] cause
+ * Object responsible for the error.
+ * @param[in] message
+ * Human-readable error message.
+ *
+ * @return
+ * Error code.
+ */
+static inline int
+rte_tm_error_set(struct rte_tm_error *error,
+ int code,
+ enum rte_tm_error_type type,
+ const void *cause,
+ const char *message)
+{
+ if (error) {
+ *error = (struct rte_tm_error){
+ .type = type,
+ .cause = cause,
+ .message = message,
+ };
+ }
+ rte_errno = code;
+ return code;
+}
+
+/**
+ * Get generic traffic manager operations structure from a port
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[out] error
+ * Error details
+ *
+ * @return
+ * The traffic manager operations structure associated with port_id on
+ * success, NULL otherwise.
+ */
+const struct rte_tm_ops *
+rte_tm_ops_get(uint8_t port_id, struct rte_tm_error *error);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_TM_DRIVER_H__ */
diff --git a/lib/librte_eventdev/Makefile b/lib/librte_eventdev/Makefile
index e06346a6..410578a1 100644
--- a/lib/librte_eventdev/Makefile
+++ b/lib/librte_eventdev/Makefile
@@ -1,6 +1,6 @@
# BSD LICENSE
#
-# Copyright(c) 2016 Cavium networks. All rights reserved.
+# Copyright(c) 2016 Cavium, Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
@@ -12,7 +12,7 @@
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
-# * Neither the name of Cavium networks nor the names of its
+# * Neither the name of Cavium, Inc nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
@@ -34,7 +34,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_eventdev.a
# library version
-LIBABIVER := 1
+LIBABIVER := 2
# build flags
CFLAGS += -O3
@@ -42,10 +42,14 @@ CFLAGS += $(WERROR_FLAGS)
# library source files
SRCS-y += rte_eventdev.c
+SRCS-y += rte_event_ring.c
# export include files
SYMLINK-y-include += rte_eventdev.h
SYMLINK-y-include += rte_eventdev_pmd.h
+SYMLINK-y-include += rte_eventdev_pmd_pci.h
+SYMLINK-y-include += rte_eventdev_pmd_vdev.h
+SYMLINK-y-include += rte_event_ring.h
# versioning export map
EXPORT_MAP := rte_eventdev_version.map
diff --git a/lib/librte_eventdev/rte_event_ring.c b/lib/librte_eventdev/rte_event_ring.c
new file mode 100644
index 00000000..b14c2127
--- /dev/null
+++ b/lib/librte_eventdev/rte_event_ring.c
@@ -0,0 +1,207 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/queue.h>
+#include <string.h>
+
+#include <rte_tailq.h>
+#include <rte_memzone.h>
+#include <rte_rwlock.h>
+#include <rte_eal_memconfig.h>
+#include "rte_event_ring.h"
+
+TAILQ_HEAD(rte_event_ring_list, rte_tailq_entry);
+
+static struct rte_tailq_elem rte_event_ring_tailq = {
+ .name = RTE_TAILQ_EVENT_RING_NAME,
+};
+EAL_REGISTER_TAILQ(rte_event_ring_tailq)
+
+int
+rte_event_ring_init(struct rte_event_ring *r, const char *name,
+ unsigned int count, unsigned int flags)
+{
+ /* compilation-time checks */
+ RTE_BUILD_BUG_ON((sizeof(struct rte_event_ring) &
+ RTE_CACHE_LINE_MASK) != 0);
+
+ /* init the ring structure */
+ return rte_ring_init(&r->r, name, count, flags);
+}
+
+/* create the ring */
+struct rte_event_ring *
+rte_event_ring_create(const char *name, unsigned int count, int socket_id,
+ unsigned int flags)
+{
+ char mz_name[RTE_MEMZONE_NAMESIZE];
+ struct rte_event_ring *r;
+ struct rte_tailq_entry *te;
+ const struct rte_memzone *mz;
+ ssize_t ring_size;
+ int mz_flags = 0;
+ struct rte_event_ring_list *ring_list = NULL;
+ const unsigned int requested_count = count;
+ int ret;
+
+ ring_list = RTE_TAILQ_CAST(rte_event_ring_tailq.head,
+ rte_event_ring_list);
+
+ /* for an exact size ring, round up from count to a power of two */
+ if (flags & RING_F_EXACT_SZ)
+ count = rte_align32pow2(count + 1);
+ else if (!rte_is_power_of_2(count)) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ ring_size = sizeof(*r) + (count * sizeof(struct rte_event));
+
+ ret = snprintf(mz_name, sizeof(mz_name), "%s%s",
+ RTE_RING_MZ_PREFIX, name);
+ if (ret < 0 || ret >= (int)sizeof(mz_name)) {
+ rte_errno = ENAMETOOLONG;
+ return NULL;
+ }
+
+ te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0);
+ if (te == NULL) {
+ RTE_LOG(ERR, RING, "Cannot reserve memory for tailq\n");
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+ /*
+ * reserve a memory zone for this ring. If we can't get rte_config or
+ * we are secondary process, the memzone_reserve function will set
+ * rte_errno for us appropriately - hence no check in this this function
+ */
+ mz = rte_memzone_reserve(mz_name, ring_size, socket_id, mz_flags);
+ if (mz != NULL) {
+ r = mz->addr;
+ /*
+ * no need to check return value here, we already checked the
+ * arguments above
+ */
+ rte_event_ring_init(r, name, requested_count, flags);
+
+ te->data = (void *) r;
+ r->r.memzone = mz;
+
+ TAILQ_INSERT_TAIL(ring_list, te, next);
+ } else {
+ r = NULL;
+ RTE_LOG(ERR, RING, "Cannot reserve memory\n");
+ rte_free(te);
+ }
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+ return r;
+}
+
+
+struct rte_event_ring *
+rte_event_ring_lookup(const char *name)
+{
+ struct rte_tailq_entry *te;
+ struct rte_event_ring *r = NULL;
+ struct rte_event_ring_list *ring_list;
+
+ ring_list = RTE_TAILQ_CAST(rte_event_ring_tailq.head,
+ rte_event_ring_list);
+
+ rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+
+ TAILQ_FOREACH(te, ring_list, next) {
+ r = (struct rte_event_ring *) te->data;
+ if (strncmp(name, r->r.name, RTE_RING_NAMESIZE) == 0)
+ break;
+ }
+
+ rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+ if (te == NULL) {
+ rte_errno = ENOENT;
+ return NULL;
+ }
+
+ return r;
+}
+
+/* free the ring */
+void
+rte_event_ring_free(struct rte_event_ring *r)
+{
+ struct rte_event_ring_list *ring_list = NULL;
+ struct rte_tailq_entry *te;
+
+ if (r == NULL)
+ return;
+
+ /*
+ * Ring was not created with rte_event_ring_create,
+ * therefore, there is no memzone to free.
+ */
+ if (r->r.memzone == NULL) {
+ RTE_LOG(ERR, RING,
+ "Cannot free ring (not created with rte_event_ring_create()");
+ return;
+ }
+
+ if (rte_memzone_free(r->r.memzone) != 0) {
+ RTE_LOG(ERR, RING, "Cannot free memory\n");
+ return;
+ }
+
+ ring_list = RTE_TAILQ_CAST(rte_event_ring_tailq.head,
+ rte_event_ring_list);
+ rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+ /* find out tailq entry */
+ TAILQ_FOREACH(te, ring_list, next) {
+ if (te->data == (void *) r)
+ break;
+ }
+
+ if (te == NULL) {
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+ return;
+ }
+
+ TAILQ_REMOVE(ring_list, te, next);
+
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+ rte_free(te);
+}
diff --git a/lib/librte_eventdev/rte_event_ring.h b/lib/librte_eventdev/rte_event_ring.h
new file mode 100644
index 00000000..ea9b6885
--- /dev/null
+++ b/lib/librte_eventdev/rte_event_ring.h
@@ -0,0 +1,308 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * RTE Event Ring
+ *
+ * This provides a ring implementation for passing rte_event structures
+ * from one core to another.
+ */
+
+#ifndef _RTE_EVENT_RING_
+#define _RTE_EVENT_RING_
+
+#include <stdint.h>
+
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_ring.h>
+#include "rte_eventdev.h"
+
+#define RTE_TAILQ_EVENT_RING_NAME "RTE_EVENT_RING"
+
+/**
+ * Generic ring structure for passing rte_event objects from core to core.
+ *
+ * Based on the primitives given in the rte_ring library. Designed to be
+ * used inside software eventdev implementations and by applications
+ * directly as needed.
+ */
+struct rte_event_ring {
+ struct rte_ring r;
+};
+
+/**
+ * Returns the number of events in the ring
+ *
+ * @param r
+ * pointer to the event ring
+ * @return
+ * the number of events in the ring
+ */
+static __rte_always_inline unsigned int
+rte_event_ring_count(const struct rte_event_ring *r)
+{
+ return rte_ring_count(&r->r);
+}
+
+/**
+ * Returns the amount of free space in the ring
+ *
+ * @param r
+ * pointer to the event ring
+ * @return
+ * the number of free slots in the ring, i.e. the number of events that
+ * can be successfully enqueued before dequeue must be called
+ */
+static __rte_always_inline unsigned int
+rte_event_ring_free_count(const struct rte_event_ring *r)
+{
+ return rte_ring_free_count(&r->r);
+}
+
+/**
+ * Enqueue a set of events onto a ring
+ *
+ * Note: this API enqueues by copying the events themselves onto the ring,
+ * rather than just placing a pointer to each event onto the ring. This
+ * means that statically-allocated events can safely be enqueued by this
+ * API.
+ *
+ * @param r
+ * pointer to the event ring
+ * @param events
+ * pointer to an array of struct rte_event objects
+ * @param n
+ * number of events in the array to enqueue
+ * @param free_space
+ * if non-null, is updated to indicate the amount of free space in the
+ * ring once the enqueue has completed.
+ * @return
+ * the number of elements, n', enqueued to the ring, 0 <= n' <= n
+ */
+static __rte_always_inline unsigned int
+rte_event_ring_enqueue_burst(struct rte_event_ring *r,
+ const struct rte_event *events,
+ unsigned int n, uint16_t *free_space)
+{
+ uint32_t prod_head, prod_next;
+ uint32_t free_entries;
+
+ n = __rte_ring_move_prod_head(&r->r, r->r.prod.single, n,
+ RTE_RING_QUEUE_VARIABLE,
+ &prod_head, &prod_next, &free_entries);
+ if (n == 0)
+ goto end;
+
+ ENQUEUE_PTRS(&r->r, &r[1], prod_head, events, n, struct rte_event);
+ rte_smp_wmb();
+
+ update_tail(&r->r.prod, prod_head, prod_next, 1);
+end:
+ if (free_space != NULL)
+ *free_space = free_entries - n;
+ return n;
+}
+
+/**
+ * Dequeue a set of events from a ring
+ *
+ * Note: this API does not work with pointers to events, rather it copies
+ * the events themselves to the destination ``events`` buffer.
+ *
+ * @param r
+ * pointer to the event ring
+ * @param events
+ * pointer to an array to hold the struct rte_event objects
+ * @param n
+ * number of events that can be held in the ``events`` array
+ * @param available
+ * if non-null, is updated to indicate the number of events remaining in
+ * the ring once the dequeue has completed
+ * @return
+ * the number of elements, n', dequeued from the ring, 0 <= n' <= n
+ */
+static __rte_always_inline unsigned int
+rte_event_ring_dequeue_burst(struct rte_event_ring *r,
+ struct rte_event *events,
+ unsigned int n, uint16_t *available)
+{
+ uint32_t cons_head, cons_next;
+ uint32_t entries;
+
+ n = __rte_ring_move_cons_head(&r->r, r->r.cons.single, n,
+ RTE_RING_QUEUE_VARIABLE,
+ &cons_head, &cons_next, &entries);
+ if (n == 0)
+ goto end;
+
+ DEQUEUE_PTRS(&r->r, &r[1], cons_head, events, n, struct rte_event);
+ rte_smp_rmb();
+
+ update_tail(&r->r.cons, cons_head, cons_next, 1);
+
+end:
+ if (available != NULL)
+ *available = entries - n;
+ return n;
+}
+
+/*
+ * Initializes an already-allocated ring structure
+ *
+ * @param r
+ * pointer to the ring memory to be initialized
+ * @param name
+ * name to be given to the ring
+ * @param count
+ * the number of elements to be stored in the ring. If the flag
+ * ``RING_F_EXACT_SZ`` is not set, this must be a power of 2, and the actual
+ * usable space in the ring will be ``count - 1`` entries. If the flag
+ * ``RING_F_EXACT_SZ`` is set, the this can be any value up to the ring size
+ * limit - 1, and the usable space will be exactly that requested.
+ * @param flags
+ * An OR of the following:
+ * - RING_F_SP_ENQ: If this flag is set, the default behavior when
+ * using ``rte_ring_enqueue()`` or ``rte_ring_enqueue_bulk()``
+ * is "single-producer". Otherwise, it is "multi-producers".
+ * - RING_F_SC_DEQ: If this flag is set, the default behavior when
+ * using ``rte_ring_dequeue()`` or ``rte_ring_dequeue_bulk()``
+ * is "single-consumer". Otherwise, it is "multi-consumers".
+ * - RING_F_EXACT_SZ: If this flag is set, the ``count`` parameter is to
+ * be taken as the exact usable size of the ring, and as such does not
+ * need to be a power of 2. The underlying ring memory should be a
+ * power-of-2 size greater than the count value.
+ * @return
+ * 0 on success, or a negative value on error.
+ */
+int
+rte_event_ring_init(struct rte_event_ring *r, const char *name,
+ unsigned int count, unsigned int flags);
+
+/*
+ * Create an event ring structure
+ *
+ * This function allocates memory and initializes an event ring inside that
+ * memory.
+ *
+ * @param name
+ * name to be given to the ring
+ * @param count
+ * the number of elements to be stored in the ring. If the flag
+ * ``RING_F_EXACT_SZ`` is not set, this must be a power of 2, and the actual
+ * usable space in the ring will be ``count - 1`` entries. If the flag
+ * ``RING_F_EXACT_SZ`` is set, the this can be any value up to the ring size
+ * limit - 1, and the usable space will be exactly that requested.
+ * @param socket_id
+ * The *socket_id* argument is the socket identifier in case of
+ * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA
+ * constraint for the reserved zone.
+ * @param flags
+ * An OR of the following:
+ * - RING_F_SP_ENQ: If this flag is set, the default behavior when
+ * using ``rte_ring_enqueue()`` or ``rte_ring_enqueue_bulk()``
+ * is "single-producer". Otherwise, it is "multi-producers".
+ * - RING_F_SC_DEQ: If this flag is set, the default behavior when
+ * using ``rte_ring_dequeue()`` or ``rte_ring_dequeue_bulk()``
+ * is "single-consumer". Otherwise, it is "multi-consumers".
+ * - RING_F_EXACT_SZ: If this flag is set, the ``count`` parameter is to
+ * be taken as the exact usable size of the ring, and as such does not
+ * need to be a power of 2. The underlying ring memory should be a
+ * power-of-2 size greater than the count value.
+ * @return
+ * On success, the pointer to the new allocated ring. NULL on error with
+ * rte_errno set appropriately. Possible errno values include:
+ * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ * - E_RTE_SECONDARY - function was called from a secondary process instance
+ * - EINVAL - count provided is not a power of 2
+ * - ENOSPC - the maximum number of memzones has already been allocated
+ * - EEXIST - a memzone with the same name already exists
+ * - ENOMEM - no appropriate memory area found in which to create memzone
+ */
+struct rte_event_ring *
+rte_event_ring_create(const char *name, unsigned int count, int socket_id,
+ unsigned int flags);
+
+/**
+ * Search for an event ring based on its name
+ *
+ * @param name
+ * The name of the ring.
+ * @return
+ * The pointer to the ring matching the name, or NULL if not found,
+ * with rte_errno set appropriately. Possible rte_errno values include:
+ * - ENOENT - required entry not available to return.
+ */
+struct rte_event_ring *
+rte_event_ring_lookup(const char *name);
+
+/**
+ * De-allocate all memory used by the ring.
+ *
+ * @param r
+ * Ring to free
+ */
+void
+rte_event_ring_free(struct rte_event_ring *r);
+
+/**
+ * Return the size of the event ring.
+ *
+ * @param r
+ * A pointer to the ring structure.
+ * @return
+ * The size of the data store used by the ring.
+ * NOTE: this is not the same as the usable space in the ring. To query that
+ * use ``rte_ring_get_capacity()``.
+ */
+static inline unsigned int
+rte_event_ring_get_size(const struct rte_event_ring *r)
+{
+ return rte_ring_get_size(&r->r);
+}
+
+/**
+ * Return the number of elements which can be stored in the event ring.
+ *
+ * @param r
+ * A pointer to the ring structure.
+ * @return
+ * The usable size of the ring.
+ */
+static inline unsigned int
+rte_event_ring_get_capacity(const struct rte_event_ring *r)
+{
+ return rte_ring_get_capacity(&r->r);
+}
+#endif
diff --git a/lib/librte_eventdev/rte_eventdev.c b/lib/librte_eventdev/rte_eventdev.c
index 20afc3f0..bbb38050 100644
--- a/lib/librte_eventdev/rte_eventdev.c
+++ b/lib/librte_eventdev/rte_eventdev.c
@@ -1,7 +1,7 @@
/*
* BSD LICENSE
*
- * Copyright(c) 2016 Cavium networks. All rights reserved.
+ * Copyright(c) 2016 Cavium, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -13,7 +13,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
@@ -45,7 +45,6 @@
#include <rte_log.h>
#include <rte_debug.h>
#include <rte_dev.h>
-#include <rte_pci.h>
#include <rte_memory.h>
#include <rte_memcpy.h>
#include <rte_memzone.h>
@@ -126,8 +125,6 @@ rte_event_dev_info_get(uint8_t dev_id, struct rte_event_dev_info *dev_info)
dev_info->dequeue_timeout_ns = dev->data->dev_conf.dequeue_timeout_ns;
dev_info->dev = dev->dev;
- if (dev->driver)
- dev_info->driver_name = dev->driver->pci_drv.driver.name;
return 0;
}
@@ -301,7 +298,7 @@ rte_event_dev_port_config(struct rte_eventdev *dev, uint8_t nb_ports)
sizeof(dev->data->links_map[0]) * nb_ports *
RTE_EVENT_MAX_QUEUES_PER_DEV,
RTE_CACHE_LINE_SIZE);
- if (dev->data->links_map == NULL) {
+ if (links_map == NULL) {
dev->data->nb_ports = 0;
RTE_EDEV_LOG_ERR("failed to realloc mem for port_map,"
"nb_ports %u", nb_ports);
@@ -369,9 +366,10 @@ rte_event_dev_configure(uint8_t dev_id,
/* Check dequeue_timeout_ns value is in limit */
if (!(dev_conf->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT)) {
- if (dev_conf->dequeue_timeout_ns < info.min_dequeue_timeout_ns
+ if (dev_conf->dequeue_timeout_ns &&
+ (dev_conf->dequeue_timeout_ns < info.min_dequeue_timeout_ns
|| dev_conf->dequeue_timeout_ns >
- info.max_dequeue_timeout_ns) {
+ info.max_dequeue_timeout_ns)) {
RTE_EDEV_LOG_ERR("dev%d invalid dequeue_timeout_ns=%d"
" min_dequeue_timeout_ns=%d max_dequeue_timeout_ns=%d",
dev_id, dev_conf->dequeue_timeout_ns,
@@ -429,8 +427,9 @@ rte_event_dev_configure(uint8_t dev_id,
dev_id);
return -EINVAL;
}
- if (dev_conf->nb_event_port_dequeue_depth >
- info.max_event_port_dequeue_depth) {
+ if ((info.event_dev_cap & RTE_EVENT_DEV_CAP_BURST_MODE) &&
+ (dev_conf->nb_event_port_dequeue_depth >
+ info.max_event_port_dequeue_depth)) {
RTE_EDEV_LOG_ERR("dev%d nb_dq_depth=%d > max_dq_depth=%d",
dev_id, dev_conf->nb_event_port_dequeue_depth,
info.max_event_port_dequeue_depth);
@@ -443,8 +442,9 @@ rte_event_dev_configure(uint8_t dev_id,
dev_id);
return -EINVAL;
}
- if (dev_conf->nb_event_port_enqueue_depth >
- info.max_event_port_enqueue_depth) {
+ if ((info.event_dev_cap & RTE_EVENT_DEV_CAP_BURST_MODE) &&
+ (dev_conf->nb_event_port_enqueue_depth >
+ info.max_event_port_enqueue_depth)) {
RTE_EDEV_LOG_ERR("dev%d nb_enq_depth=%d > max_enq_depth=%d",
dev_id, dev_conf->nb_event_port_enqueue_depth,
info.max_event_port_enqueue_depth);
@@ -1174,10 +1174,6 @@ rte_event_pmd_release(struct rte_eventdev *eventdev)
if (eventdev == NULL)
return -EINVAL;
- ret = rte_event_dev_close(eventdev->data->dev_id);
- if (ret < 0)
- return ret;
-
eventdev->attached = RTE_EVENTDEV_DETACHED;
eventdev_globals.nb_devs--;
@@ -1202,144 +1198,3 @@ rte_event_pmd_release(struct rte_eventdev *eventdev)
eventdev->data = NULL;
return 0;
}
-
-struct rte_eventdev *
-rte_event_pmd_vdev_init(const char *name, size_t dev_private_size,
- int socket_id)
-{
- struct rte_eventdev *eventdev;
-
- /* Allocate device structure */
- eventdev = rte_event_pmd_allocate(name, socket_id);
- if (eventdev == NULL)
- return NULL;
-
- /* Allocate private device structure */
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- eventdev->data->dev_private =
- rte_zmalloc_socket("eventdev device private",
- dev_private_size,
- RTE_CACHE_LINE_SIZE,
- socket_id);
-
- if (eventdev->data->dev_private == NULL)
- rte_panic("Cannot allocate memzone for private device"
- " data");
- }
-
- return eventdev;
-}
-
-int
-rte_event_pmd_vdev_uninit(const char *name)
-{
- struct rte_eventdev *eventdev;
-
- if (name == NULL)
- return -EINVAL;
-
- eventdev = rte_event_pmd_get_named_dev(name);
- if (eventdev == NULL)
- return -ENODEV;
-
- /* Free the event device */
- rte_event_pmd_release(eventdev);
-
- return 0;
-}
-
-int
-rte_event_pmd_pci_probe(struct rte_pci_driver *pci_drv,
- struct rte_pci_device *pci_dev)
-{
- struct rte_eventdev_driver *eventdrv;
- struct rte_eventdev *eventdev;
-
- char eventdev_name[RTE_EVENTDEV_NAME_MAX_LEN];
-
- int retval;
-
- eventdrv = (struct rte_eventdev_driver *)pci_drv;
- if (eventdrv == NULL)
- return -ENODEV;
-
- rte_pci_device_name(&pci_dev->addr, eventdev_name,
- sizeof(eventdev_name));
-
- eventdev = rte_event_pmd_allocate(eventdev_name,
- pci_dev->device.numa_node);
- if (eventdev == NULL)
- return -ENOMEM;
-
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- eventdev->data->dev_private =
- rte_zmalloc_socket(
- "eventdev private structure",
- eventdrv->dev_private_size,
- RTE_CACHE_LINE_SIZE,
- rte_socket_id());
-
- if (eventdev->data->dev_private == NULL)
- rte_panic("Cannot allocate memzone for private "
- "device data");
- }
-
- eventdev->dev = &pci_dev->device;
- eventdev->driver = eventdrv;
-
- /* Invoke PMD device initialization function */
- retval = (*eventdrv->eventdev_init)(eventdev);
- if (retval == 0)
- return 0;
-
- RTE_EDEV_LOG_ERR("driver %s: (vendor_id=0x%x device_id=0x%x)"
- " failed", pci_drv->driver.name,
- (unsigned int) pci_dev->id.vendor_id,
- (unsigned int) pci_dev->id.device_id);
-
- if (rte_eal_process_type() == RTE_PROC_PRIMARY)
- rte_free(eventdev->data->dev_private);
-
- eventdev->attached = RTE_EVENTDEV_DETACHED;
- eventdev_globals.nb_devs--;
-
- return -ENXIO;
-}
-
-int
-rte_event_pmd_pci_remove(struct rte_pci_device *pci_dev)
-{
- const struct rte_eventdev_driver *eventdrv;
- struct rte_eventdev *eventdev;
- char eventdev_name[RTE_EVENTDEV_NAME_MAX_LEN];
- int ret;
-
- if (pci_dev == NULL)
- return -EINVAL;
-
- rte_pci_device_name(&pci_dev->addr, eventdev_name,
- sizeof(eventdev_name));
-
- eventdev = rte_event_pmd_get_named_dev(eventdev_name);
- if (eventdev == NULL)
- return -ENODEV;
-
- eventdrv = (const struct rte_eventdev_driver *)pci_dev->driver;
- if (eventdrv == NULL)
- return -ENODEV;
-
- /* Invoke PMD device un-init function */
- if (*eventdrv->eventdev_uninit) {
- ret = (*eventdrv->eventdev_uninit)(eventdev);
- if (ret)
- return ret;
- }
-
- /* Free event device */
- rte_event_pmd_release(eventdev);
-
- eventdev->dev = NULL;
- eventdev->driver = NULL;
-
- return 0;
-}
diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h
index 20e7293e..128bc522 100644
--- a/lib/librte_eventdev/rte_eventdev.h
+++ b/lib/librte_eventdev/rte_eventdev.h
@@ -1,7 +1,7 @@
/*
* BSD LICENSE
*
- * Copyright 2016 Cavium.
+ * Copyright 2016 Cavium, Inc.
* Copyright 2016 Intel Corporation.
* Copyright 2016 NXP.
*
@@ -15,7 +15,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
@@ -199,20 +199,6 @@
* operation. Instead, Event drivers export Poll-Mode enqueue and dequeue
* functions to applications.
*
- * An event driven based application has following typical workflow on fastpath:
- * \code{.c}
- * while (1) {
- *
- * rte_event_schedule(dev_id);
- *
- * rte_event_dequeue(...);
- *
- * (event processing)
- *
- * rte_event_enqueue(...);
- * }
- * \endcode
- *
* The events are injected to event device through *enqueue* operation by
* event producers in the system. The typical event producers are ethdev
* subsystem for generating packet events, CPU(SW) for generating events based
@@ -237,6 +223,15 @@
* indicates the device is centralized and thus needs a dedicated scheduling
* thread that repeatedly calls rte_event_schedule().
*
+ * An event driven worker thread has following typical workflow on fastpath:
+ * \code{.c}
+ * while (1) {
+ * rte_event_dequeue_burst(...);
+ * (event processing)
+ * rte_event_enqueue_burst(...);
+ * }
+ * \endcode
+ *
*/
#ifdef __cplusplus
@@ -279,6 +274,14 @@ struct rte_mbuf; /* we just use mbuf pointers; no need to include rte_mbuf.h */
*
* @see RTE_EVENT_QUEUE_CFG_* values
*/
+#define RTE_EVENT_DEV_CAP_BURST_MODE (1ULL << 4)
+/**< Event device is capable of operating in burst mode for enqueue(forward,
+ * release) and dequeue operation. If this capability is not set, application
+ * still uses the rte_event_dequeue_burst() and rte_event_enqueue_burst() but
+ * PMD accepts only one event at a time.
+ *
+ * @see rte_event_dequeue_burst() rte_event_enqueue_burst()
+ */
/* Event device priority levels */
#define RTE_EVENT_DEV_PRIORITY_HIGHEST 0
@@ -409,6 +412,7 @@ struct rte_event_dev_config {
* This value should be in the range of *min_dequeue_timeout_ns* and
* *max_dequeue_timeout_ns* which previously provided in
* rte_event_dev_info_get()
+ * The value 0 is allowed, in which case, default dequeue timeout used.
* @see RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT
*/
int32_t nb_events_limit;
@@ -438,14 +442,16 @@ struct rte_event_dev_config {
/**< Maximum number of events can be dequeued at a time from an
* event port by this device.
* This value cannot exceed the *max_event_port_dequeue_depth*
- * which previously provided in rte_event_dev_info_get()
+ * which previously provided in rte_event_dev_info_get().
+ * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
* @see rte_event_port_setup()
*/
uint32_t nb_event_port_enqueue_depth;
/**< Maximum number of events can be enqueued at a time from an
* event port by this device.
* This value cannot exceed the *max_event_port_enqueue_depth*
- * which previously provided in rte_event_dev_info_get()
+ * which previously provided in rte_event_dev_info_get().
+ * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
* @see rte_event_port_setup()
*/
uint32_t event_dev_cfg;
@@ -521,9 +527,11 @@ rte_event_dev_configure(uint8_t dev_id,
struct rte_event_queue_conf {
uint32_t nb_atomic_flows;
/**< The maximum number of active flows this queue can track at any
- * given time. The value must be in the range of
- * [1 - nb_event_queue_flows)] which previously provided in
- * rte_event_dev_info_get().
+ * given time. If the queue is configured for atomic scheduling (by
+ * applying the RTE_EVENT_QUEUE_CFG_ALL_TYPES or
+ * RTE_EVENT_QUEUE_CFG_ATOMIC_ONLY flags to event_queue_cfg), then the
+ * value must be in the range of [1, nb_event_queue_flows], which was
+ * previously provided in rte_event_dev_configure().
*/
uint32_t nb_atomic_order_sequences;
/**< The maximum number of outstanding events waiting to be
@@ -533,8 +541,11 @@ struct rte_event_queue_conf {
* scheduler cannot schedule the events from this queue and invalid
* event will be returned from dequeue until one or more entries are
* freed up/released.
- * The value must be in the range of [1 - nb_event_queue_flows)]
- * which previously supplied to rte_event_dev_configure().
+ * If the queue is configured for ordered scheduling (by applying the
+ * RTE_EVENT_QUEUE_CFG_ALL_TYPES or RTE_EVENT_QUEUE_CFG_ORDERED_ONLY
+ * flags to event_queue_cfg), then the value must be in the range of
+ * [1, nb_event_queue_flows], which was previously supplied to
+ * rte_event_dev_configure().
*/
uint32_t event_queue_cfg; /**< Queue cfg flags(EVENT_QUEUE_CFG_) */
uint8_t priority;
@@ -642,12 +653,14 @@ struct rte_event_port_conf {
uint16_t dequeue_depth;
/**< Configure number of bulk dequeues for this event port.
* This value cannot exceed the *nb_event_port_dequeue_depth*
- * which previously supplied to rte_event_dev_configure()
+ * which previously supplied to rte_event_dev_configure().
+ * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
*/
uint16_t enqueue_depth;
/**< Configure number of bulk enqueues for this event port.
* This value cannot exceed the *nb_event_port_enqueue_depth*
- * which previously supplied to rte_event_dev_configure()
+ * which previously supplied to rte_event_dev_configure().
+ * Ignored when device is not RTE_EVENT_DEV_CAP_BURST_MODE capable.
*/
};
@@ -1052,6 +1065,10 @@ struct rte_eventdev {
/**< Pointer to PMD enqueue function. */
event_enqueue_burst_t enqueue_burst;
/**< Pointer to PMD enqueue burst function. */
+ event_enqueue_burst_t enqueue_new_burst;
+ /**< Pointer to PMD enqueue burst function(op new variant) */
+ event_enqueue_burst_t enqueue_forward_burst;
+ /**< Pointer to PMD enqueue burst function(op forward variant) */
event_dequeue_t dequeue;
/**< Pointer to PMD dequeue function. */
event_dequeue_burst_t dequeue_burst;
@@ -1063,8 +1080,6 @@ struct rte_eventdev {
/**< Functions exported by PMD */
struct rte_device *dev;
/**< Device info. supplied by probing */
- const struct rte_eventdev_driver *driver;
- /**< Driver for this device */
RTE_STD_C11
uint8_t attached : 1;
@@ -1092,6 +1107,34 @@ rte_event_schedule(uint8_t dev_id)
(*dev->schedule)(dev);
}
+static __rte_always_inline uint16_t
+__rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id,
+ const struct rte_event ev[], uint16_t nb_events,
+ const event_enqueue_burst_t fn)
+{
+ const struct rte_eventdev *dev = &rte_eventdevs[dev_id];
+
+#ifdef RTE_LIBRTE_EVENTDEV_DEBUG
+ if (dev_id >= RTE_EVENT_MAX_DEVS || !rte_eventdevs[dev_id].attached) {
+ rte_errno = -EINVAL;
+ return 0;
+ }
+
+ if (port_id >= dev->data->nb_ports) {
+ rte_errno = -EINVAL;
+ return 0;
+ }
+#endif
+ /*
+ * Allow zero cost non burst mode routine invocation if application
+ * requests nb_events as const one
+ */
+ if (nb_events == 1)
+ return (*dev->enqueue)(dev->data->ports[port_id], ev);
+ else
+ return fn(dev->data->ports[port_id], ev, nb_events);
+}
+
/**
* Enqueue a burst of events objects or an event object supplied in *rte_event*
* structure on an event device designated by its *dev_id* through the event
@@ -1135,30 +1178,108 @@ static inline uint16_t
rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id,
const struct rte_event ev[], uint16_t nb_events)
{
- struct rte_eventdev *dev = &rte_eventdevs[dev_id];
+ const struct rte_eventdev *dev = &rte_eventdevs[dev_id];
-#ifdef RTE_LIBRTE_EVENTDEV_DEBUG
- if (dev_id >= RTE_EVENT_MAX_DEVS || !rte_eventdevs[dev_id].attached) {
- rte_errno = -EINVAL;
- return 0;
- }
+ return __rte_event_enqueue_burst(dev_id, port_id, ev, nb_events,
+ dev->enqueue_burst);
+}
- if (port_id >= dev->data->nb_ports) {
- rte_errno = -EINVAL;
- return 0;
- }
-#endif
+/**
+ * Enqueue a burst of events objects of operation type *RTE_EVENT_OP_NEW* on
+ * an event device designated by its *dev_id* through the event port specified
+ * by *port_id*.
+ *
+ * Provides the same functionality as rte_event_enqueue_burst(), expect that
+ * application can use this API when the all objects in the burst contains
+ * the enqueue operation of the type *RTE_EVENT_OP_NEW*. This specialized
+ * function can provide the additional hint to the PMD and optimize if possible.
+ *
+ * The rte_event_enqueue_new_burst() result is undefined if the enqueue burst
+ * has event object of operation type != RTE_EVENT_OP_NEW.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @param port_id
+ * The identifier of the event port.
+ * @param ev
+ * Points to an array of *nb_events* objects of type *rte_event* structure
+ * which contain the event object enqueue operations to be processed.
+ * @param nb_events
+ * The number of event objects to enqueue, typically number of
+ * rte_event_port_enqueue_depth() available for this port.
+ *
+ * @return
+ * The number of event objects actually enqueued on the event device. The
+ * return value can be less than the value of the *nb_events* parameter when
+ * the event devices queue is full or if invalid parameters are specified in a
+ * *rte_event*. If the return value is less than *nb_events*, the remaining
+ * events at the end of ev[] are not consumed and the caller has to take care
+ * of them, and rte_errno is set accordingly. Possible errno values include:
+ * - -EINVAL The port ID is invalid, device ID is invalid, an event's queue
+ * ID is invalid, or an event's sched type doesn't match the
+ * capabilities of the destination queue.
+ * - -ENOSPC The event port was backpressured and unable to enqueue
+ * one or more events. This error code is only applicable to
+ * closed systems.
+ * @see rte_event_port_enqueue_depth() rte_event_enqueue_burst()
+ */
+static inline uint16_t
+rte_event_enqueue_new_burst(uint8_t dev_id, uint8_t port_id,
+ const struct rte_event ev[], uint16_t nb_events)
+{
+ const struct rte_eventdev *dev = &rte_eventdevs[dev_id];
- /*
- * Allow zero cost non burst mode routine invocation if application
- * requests nb_events as const one
- */
- if (nb_events == 1)
- return (*dev->enqueue)(
- dev->data->ports[port_id], ev);
- else
- return (*dev->enqueue_burst)(
- dev->data->ports[port_id], ev, nb_events);
+ return __rte_event_enqueue_burst(dev_id, port_id, ev, nb_events,
+ dev->enqueue_new_burst);
+}
+
+/**
+ * Enqueue a burst of events objects of operation type *RTE_EVENT_OP_FORWARD*
+ * on an event device designated by its *dev_id* through the event port
+ * specified by *port_id*.
+ *
+ * Provides the same functionality as rte_event_enqueue_burst(), expect that
+ * application can use this API when the all objects in the burst contains
+ * the enqueue operation of the type *RTE_EVENT_OP_FORWARD*. This specialized
+ * function can provide the additional hint to the PMD and optimize if possible.
+ *
+ * The rte_event_enqueue_new_burst() result is undefined if the enqueue burst
+ * has event object of operation type != RTE_EVENT_OP_FORWARD.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @param port_id
+ * The identifier of the event port.
+ * @param ev
+ * Points to an array of *nb_events* objects of type *rte_event* structure
+ * which contain the event object enqueue operations to be processed.
+ * @param nb_events
+ * The number of event objects to enqueue, typically number of
+ * rte_event_port_enqueue_depth() available for this port.
+ *
+ * @return
+ * The number of event objects actually enqueued on the event device. The
+ * return value can be less than the value of the *nb_events* parameter when
+ * the event devices queue is full or if invalid parameters are specified in a
+ * *rte_event*. If the return value is less than *nb_events*, the remaining
+ * events at the end of ev[] are not consumed and the caller has to take care
+ * of them, and rte_errno is set accordingly. Possible errno values include:
+ * - -EINVAL The port ID is invalid, device ID is invalid, an event's queue
+ * ID is invalid, or an event's sched type doesn't match the
+ * capabilities of the destination queue.
+ * - -ENOSPC The event port was backpressured and unable to enqueue
+ * one or more events. This error code is only applicable to
+ * closed systems.
+ * @see rte_event_port_enqueue_depth() rte_event_enqueue_burst()
+ */
+static inline uint16_t
+rte_event_enqueue_forward_burst(uint8_t dev_id, uint8_t port_id,
+ const struct rte_event ev[], uint16_t nb_events)
+{
+ const struct rte_eventdev *dev = &rte_eventdevs[dev_id];
+
+ return __rte_event_enqueue_burst(dev_id, port_id, ev, nb_events,
+ dev->enqueue_forward_burst);
}
/**
diff --git a/lib/librte_eventdev/rte_eventdev_pmd.h b/lib/librte_eventdev/rte_eventdev_pmd.h
index 4005b3c9..3d72acf3 100644
--- a/lib/librte_eventdev/rte_eventdev_pmd.h
+++ b/lib/librte_eventdev/rte_eventdev_pmd.h
@@ -1,6 +1,6 @@
/*
*
- * Copyright(c) 2016 Cavium networks. All rights reserved.
+ * Copyright(c) 2016 Cavium, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -12,7 +12,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
@@ -46,11 +46,10 @@ extern "C" {
#include <string.h>
+#include <rte_common.h>
#include <rte_dev.h>
-#include <rte_pci.h>
-#include <rte_malloc.h>
#include <rte_log.h>
-#include <rte_common.h>
+#include <rte_malloc.h>
#include "rte_eventdev.h"
@@ -87,60 +86,6 @@ extern "C" {
#define RTE_EVENTDEV_DETACHED (0)
#define RTE_EVENTDEV_ATTACHED (1)
-/**
- * Initialisation function of a event driver invoked for each matching
- * event PCI device detected during the PCI probing phase.
- *
- * @param dev
- * The dev pointer is the address of the *rte_eventdev* structure associated
- * with the matching device and which has been [automatically] allocated in
- * the *rte_event_devices* array.
- *
- * @return
- * - 0: Success, the device is properly initialised by the driver.
- * In particular, the driver MUST have set up the *dev_ops* pointer
- * of the *dev* structure.
- * - <0: Error code of the device initialisation failure.
- */
-typedef int (*eventdev_init_t)(struct rte_eventdev *dev);
-
-/**
- * Finalisation function of a driver invoked for each matching
- * PCI device detected during the PCI closing phase.
- *
- * @param dev
- * The dev pointer is the address of the *rte_eventdev* structure associated
- * with the matching device and which has been [automatically] allocated in
- * the *rte_event_devices* array.
- *
- * @return
- * - 0: Success, the device is properly finalised by the driver.
- * In particular, the driver MUST free the *dev_ops* pointer
- * of the *dev* structure.
- * - <0: Error code of the device initialisation failure.
- */
-typedef int (*eventdev_uninit_t)(struct rte_eventdev *dev);
-
-/**
- * The structure associated with a PMD driver.
- *
- * Each driver acts as a PCI driver and is represented by a generic
- * *event_driver* structure that holds:
- *
- * - An *rte_pci_driver* structure (which must be the first field).
- *
- * - The *eventdev_init* function invoked for each matching PCI device.
- *
- * - The size of the private data to allocate for each matching device.
- */
-struct rte_eventdev_driver {
- struct rte_pci_driver pci_drv; /**< The PMD is also a PCI driver. */
- unsigned int dev_private_size; /**< Size of device private data. */
-
- eventdev_init_t eventdev_init; /**< Device init function. */
- eventdev_uninit_t eventdev_uninit; /**< Device uninit function. */
-};
-
/** Global structure used for maintaining state of allocated event devices */
struct rte_eventdev_global {
uint8_t nb_devs; /**< Number of devices found */
@@ -550,48 +495,6 @@ rte_event_pmd_allocate(const char *name, int socket_id);
int
rte_event_pmd_release(struct rte_eventdev *eventdev);
-/**
- * Creates a new virtual event device and returns the pointer to that device.
- *
- * @param name
- * PMD type name
- * @param dev_private_size
- * Size of event PMDs private data
- * @param socket_id
- * Socket to allocate resources on.
- *
- * @return
- * - Eventdev pointer if device is successfully created.
- * - NULL if device cannot be created.
- */
-struct rte_eventdev *
-rte_event_pmd_vdev_init(const char *name, size_t dev_private_size,
- int socket_id);
-
-/**
- * Destroy the given virtual event device
- *
- * @param name
- * PMD type name
- * @return
- * - 0 on success, negative on error
- */
-int
-rte_event_pmd_vdev_uninit(const char *name);
-
-/**
- * Wrapper for use by pci drivers as a .probe function to attach to a event
- * interface.
- */
-int rte_event_pmd_pci_probe(struct rte_pci_driver *pci_drv,
- struct rte_pci_device *pci_dev);
-
-/**
- * Wrapper for use by pci drivers as a .remove function to detach a event
- * interface.
- */
-int rte_event_pmd_pci_remove(struct rte_pci_device *pci_dev);
-
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eventdev/rte_eventdev_pmd_pci.h b/lib/librte_eventdev/rte_eventdev_pmd_pci.h
new file mode 100644
index 00000000..b6bd7319
--- /dev/null
+++ b/lib/librte_eventdev/rte_eventdev_pmd_pci.h
@@ -0,0 +1,162 @@
+/*
+ *
+ * Copyright(c) 2016-2017 Cavium, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium, Inc nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_EVENTDEV_PMD_PCI_H_
+#define _RTE_EVENTDEV_PMD_PCI_H_
+
+/** @file
+ * RTE Eventdev PCI PMD APIs
+ *
+ * @note
+ * These API are from event PCI PMD only and user applications should not call
+ * them directly.
+ */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <string.h>
+
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_pci.h>
+
+#include "rte_eventdev_pmd.h"
+
+typedef int (*eventdev_pmd_pci_callback_t)(struct rte_eventdev *dev);
+
+/**
+ * @internal
+ * Wrapper for use by pci drivers as a .probe function to attach to a event
+ * interface.
+ */
+static inline int
+rte_event_pmd_pci_probe(struct rte_pci_driver *pci_drv,
+ struct rte_pci_device *pci_dev,
+ size_t private_data_size,
+ eventdev_pmd_pci_callback_t devinit)
+{
+ struct rte_eventdev *eventdev;
+
+ char eventdev_name[RTE_EVENTDEV_NAME_MAX_LEN];
+
+ int retval;
+
+ if (devinit == NULL)
+ return -EINVAL;
+
+ rte_pci_device_name(&pci_dev->addr, eventdev_name,
+ sizeof(eventdev_name));
+
+ eventdev = rte_event_pmd_allocate(eventdev_name,
+ pci_dev->device.numa_node);
+ if (eventdev == NULL)
+ return -ENOMEM;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ eventdev->data->dev_private =
+ rte_zmalloc_socket(
+ "eventdev private structure",
+ private_data_size,
+ RTE_CACHE_LINE_SIZE,
+ rte_socket_id());
+
+ if (eventdev->data->dev_private == NULL)
+ rte_panic("Cannot allocate memzone for private "
+ "device data");
+ }
+
+ eventdev->dev = &pci_dev->device;
+
+ /* Invoke PMD device initialization function */
+ retval = devinit(eventdev);
+ if (retval == 0)
+ return 0;
+
+ RTE_EDEV_LOG_ERR("driver %s: (vendor_id=0x%x device_id=0x%x)"
+ " failed", pci_drv->driver.name,
+ (unsigned int) pci_dev->id.vendor_id,
+ (unsigned int) pci_dev->id.device_id);
+
+ rte_event_pmd_release(eventdev);
+
+ return -ENXIO;
+}
+
+
+/**
+ * @internal
+ * Wrapper for use by pci drivers as a .remove function to detach a event
+ * interface.
+ */
+static inline int
+rte_event_pmd_pci_remove(struct rte_pci_device *pci_dev,
+ eventdev_pmd_pci_callback_t devuninit)
+{
+ struct rte_eventdev *eventdev;
+ char eventdev_name[RTE_EVENTDEV_NAME_MAX_LEN];
+ int ret = 0;
+
+ if (pci_dev == NULL)
+ return -EINVAL;
+
+ rte_pci_device_name(&pci_dev->addr, eventdev_name,
+ sizeof(eventdev_name));
+
+ eventdev = rte_event_pmd_get_named_dev(eventdev_name);
+ if (eventdev == NULL)
+ return -ENODEV;
+
+ ret = rte_event_dev_close(eventdev->data->dev_id);
+ if (ret < 0)
+ return ret;
+
+ /* Invoke PMD device un-init function */
+ if (devuninit)
+ ret = devuninit(eventdev);
+ if (ret)
+ return ret;
+
+ /* Free event device */
+ rte_event_pmd_release(eventdev);
+
+ eventdev->dev = NULL;
+
+ return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_EVENTDEV_PMD_PCI_H_ */
diff --git a/lib/librte_eventdev/rte_eventdev_pmd_vdev.h b/lib/librte_eventdev/rte_eventdev_pmd_vdev.h
new file mode 100644
index 00000000..135e8b80
--- /dev/null
+++ b/lib/librte_eventdev/rte_eventdev_pmd_vdev.h
@@ -0,0 +1,134 @@
+/*
+ *
+ * Copyright(c) 2016-2017 Cavium, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium, Inc nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_EVENTDEV_PMD_VDEV_H_
+#define _RTE_EVENTDEV_PMD_VDEV_H_
+
+/** @file
+ * RTE Eventdev VDEV PMD APIs
+ *
+ * @note
+ * These API are from event VDEV PMD only and user applications should not call
+ * them directly.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <string.h>
+
+#include <rte_debug.h>
+#include <rte_eal.h>
+#include <rte_vdev.h>
+
+#include "rte_eventdev_pmd.h"
+
+/**
+ * @internal
+ * Creates a new virtual event device and returns the pointer to that device.
+ *
+ * @param name
+ * PMD type name
+ * @param dev_private_size
+ * Size of event PMDs private data
+ * @param socket_id
+ * Socket to allocate resources on.
+ *
+ * @return
+ * - Eventdev pointer if device is successfully created.
+ * - NULL if device cannot be created.
+ */
+static inline struct rte_eventdev *
+rte_event_pmd_vdev_init(const char *name, size_t dev_private_size,
+ int socket_id)
+{
+
+ struct rte_eventdev *eventdev;
+
+ /* Allocate device structure */
+ eventdev = rte_event_pmd_allocate(name, socket_id);
+ if (eventdev == NULL)
+ return NULL;
+
+ /* Allocate private device structure */
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ eventdev->data->dev_private =
+ rte_zmalloc_socket("eventdev device private",
+ dev_private_size,
+ RTE_CACHE_LINE_SIZE,
+ socket_id);
+
+ if (eventdev->data->dev_private == NULL)
+ rte_panic("Cannot allocate memzone for private device"
+ " data");
+ }
+
+ return eventdev;
+}
+
+/**
+ * @internal
+ * Destroy the given virtual event device
+ *
+ * @param name
+ * PMD type name
+ * @return
+ * - 0 on success, negative on error
+ */
+static inline int
+rte_event_pmd_vdev_uninit(const char *name)
+{
+ int ret;
+ struct rte_eventdev *eventdev;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ eventdev = rte_event_pmd_get_named_dev(name);
+ if (eventdev == NULL)
+ return -ENODEV;
+
+ ret = rte_event_dev_close(eventdev->data->dev_id);
+ if (ret < 0)
+ return ret;
+
+ /* Free the event device */
+ rte_event_pmd_release(eventdev);
+
+ return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_EVENTDEV_PMD_VDEV_H_ */
diff --git a/lib/librte_eventdev/rte_eventdev_version.map b/lib/librte_eventdev/rte_eventdev_version.map
index 1fa6b333..4c48e5f0 100644
--- a/lib/librte_eventdev/rte_eventdev_version.map
+++ b/lib/librte_eventdev/rte_eventdev_version.map
@@ -42,3 +42,12 @@ DPDK_17.05 {
local: *;
};
+
+DPDK_17.08 {
+ global:
+
+ rte_event_ring_create;
+ rte_event_ring_free;
+ rte_event_ring_init;
+ rte_event_ring_lookup;
+} DPDK_17.05;
diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile
new file mode 100644
index 00000000..747eeec9
--- /dev/null
+++ b/lib/librte_gro/Makefile
@@ -0,0 +1,51 @@
+# BSD LICENSE
+#
+# Copyright(c) 2017 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_gro.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+EXPORT_MAP := rte_gro_version.map
+
+LIBABIVER := 1
+
+# source files
+SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c
+SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_tcp4.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c
new file mode 100644
index 00000000..61a04232
--- /dev/null
+++ b/lib/librte_gro/gro_tcp4.c
@@ -0,0 +1,505 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+
+#include "gro_tcp4.h"
+
+void *
+gro_tcp4_tbl_create(uint16_t socket_id,
+ uint16_t max_flow_num,
+ uint16_t max_item_per_flow)
+{
+ struct gro_tcp4_tbl *tbl;
+ size_t size;
+ uint32_t entries_num, i;
+
+ entries_num = max_flow_num * max_item_per_flow;
+ entries_num = RTE_MIN(entries_num, GRO_TCP4_TBL_MAX_ITEM_NUM);
+
+ if (entries_num == 0)
+ return NULL;
+
+ tbl = rte_zmalloc_socket(__func__,
+ sizeof(struct gro_tcp4_tbl),
+ RTE_CACHE_LINE_SIZE,
+ socket_id);
+ if (tbl == NULL)
+ return NULL;
+
+ size = sizeof(struct gro_tcp4_item) * entries_num;
+ tbl->items = rte_zmalloc_socket(__func__,
+ size,
+ RTE_CACHE_LINE_SIZE,
+ socket_id);
+ if (tbl->items == NULL) {
+ rte_free(tbl);
+ return NULL;
+ }
+ tbl->max_item_num = entries_num;
+
+ size = sizeof(struct gro_tcp4_key) * entries_num;
+ tbl->keys = rte_zmalloc_socket(__func__,
+ size,
+ RTE_CACHE_LINE_SIZE,
+ socket_id);
+ if (tbl->keys == NULL) {
+ rte_free(tbl->items);
+ rte_free(tbl);
+ return NULL;
+ }
+ /* INVALID_ARRAY_INDEX indicates empty key */
+ for (i = 0; i < entries_num; i++)
+ tbl->keys[i].start_index = INVALID_ARRAY_INDEX;
+ tbl->max_key_num = entries_num;
+
+ return tbl;
+}
+
+void
+gro_tcp4_tbl_destroy(void *tbl)
+{
+ struct gro_tcp4_tbl *tcp_tbl = tbl;
+
+ if (tcp_tbl) {
+ rte_free(tcp_tbl->items);
+ rte_free(tcp_tbl->keys);
+ }
+ rte_free(tcp_tbl);
+}
+
+/*
+ * merge two TCP/IPv4 packets without updating checksums.
+ * If cmp is larger than 0, append the new packet to the
+ * original packet. Otherwise, pre-pend the new packet to
+ * the original packet.
+ */
+static inline int
+merge_two_tcp4_packets(struct gro_tcp4_item *item_src,
+ struct rte_mbuf *pkt,
+ uint16_t ip_id,
+ uint32_t sent_seq,
+ int cmp)
+{
+ struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
+ uint16_t tcp_datalen;
+
+ if (cmp > 0) {
+ pkt_head = item_src->firstseg;
+ pkt_tail = pkt;
+ } else {
+ pkt_head = pkt;
+ pkt_tail = item_src->firstseg;
+ }
+
+ /* check if the packet length will be beyond the max value */
+ tcp_datalen = pkt_tail->pkt_len - pkt_tail->l2_len -
+ pkt_tail->l3_len - pkt_tail->l4_len;
+ if (pkt_head->pkt_len - pkt_head->l2_len + tcp_datalen >
+ TCP4_MAX_L3_LENGTH)
+ return 0;
+
+ /* remove packet header for the tail packet */
+ rte_pktmbuf_adj(pkt_tail,
+ pkt_tail->l2_len +
+ pkt_tail->l3_len +
+ pkt_tail->l4_len);
+
+ /* chain two packets together */
+ if (cmp > 0) {
+ item_src->lastseg->next = pkt;
+ item_src->lastseg = rte_pktmbuf_lastseg(pkt);
+ /* update IP ID to the larger value */
+ item_src->ip_id = ip_id;
+ } else {
+ lastseg = rte_pktmbuf_lastseg(pkt);
+ lastseg->next = item_src->firstseg;
+ item_src->firstseg = pkt;
+ /* update sent_seq to the smaller value */
+ item_src->sent_seq = sent_seq;
+ }
+ item_src->nb_merged++;
+
+ /* update mbuf metadata for the merged packet */
+ pkt_head->nb_segs += pkt_tail->nb_segs;
+ pkt_head->pkt_len += pkt_tail->pkt_len;
+
+ return 1;
+}
+
+static inline int
+check_seq_option(struct gro_tcp4_item *item,
+ struct tcp_hdr *tcp_hdr,
+ uint16_t tcp_hl,
+ uint16_t tcp_dl,
+ uint16_t ip_id,
+ uint32_t sent_seq)
+{
+ struct rte_mbuf *pkt0 = item->firstseg;
+ struct ipv4_hdr *ipv4_hdr0;
+ struct tcp_hdr *tcp_hdr0;
+ uint16_t tcp_hl0, tcp_dl0;
+ uint16_t len;
+
+ ipv4_hdr0 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt0, char *) +
+ pkt0->l2_len);
+ tcp_hdr0 = (struct tcp_hdr *)((char *)ipv4_hdr0 + pkt0->l3_len);
+ tcp_hl0 = pkt0->l4_len;
+
+ /* check if TCP option fields equal. If not, return 0. */
+ len = RTE_MAX(tcp_hl, tcp_hl0) - sizeof(struct tcp_hdr);
+ if ((tcp_hl != tcp_hl0) ||
+ ((len > 0) && (memcmp(tcp_hdr + 1,
+ tcp_hdr0 + 1,
+ len) != 0)))
+ return 0;
+
+ /* check if the two packets are neighbors */
+ tcp_dl0 = pkt0->pkt_len - pkt0->l2_len - pkt0->l3_len - tcp_hl0;
+ if ((sent_seq == (item->sent_seq + tcp_dl0)) &&
+ (ip_id == (item->ip_id + 1)))
+ /* append the new packet */
+ return 1;
+ else if (((sent_seq + tcp_dl) == item->sent_seq) &&
+ ((ip_id + item->nb_merged) == item->ip_id))
+ /* pre-pend the new packet */
+ return -1;
+ else
+ return 0;
+}
+
+static inline uint32_t
+find_an_empty_item(struct gro_tcp4_tbl *tbl)
+{
+ uint32_t i;
+ uint32_t max_item_num = tbl->max_item_num;
+
+ for (i = 0; i < max_item_num; i++)
+ if (tbl->items[i].firstseg == NULL)
+ return i;
+ return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+find_an_empty_key(struct gro_tcp4_tbl *tbl)
+{
+ uint32_t i;
+ uint32_t max_key_num = tbl->max_key_num;
+
+ for (i = 0; i < max_key_num; i++)
+ if (tbl->keys[i].start_index == INVALID_ARRAY_INDEX)
+ return i;
+ return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+insert_new_item(struct gro_tcp4_tbl *tbl,
+ struct rte_mbuf *pkt,
+ uint16_t ip_id,
+ uint32_t sent_seq,
+ uint32_t prev_idx,
+ uint64_t start_time)
+{
+ uint32_t item_idx;
+
+ item_idx = find_an_empty_item(tbl);
+ if (item_idx == INVALID_ARRAY_INDEX)
+ return INVALID_ARRAY_INDEX;
+
+ tbl->items[item_idx].firstseg = pkt;
+ tbl->items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt);
+ tbl->items[item_idx].start_time = start_time;
+ tbl->items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX;
+ tbl->items[item_idx].sent_seq = sent_seq;
+ tbl->items[item_idx].ip_id = ip_id;
+ tbl->items[item_idx].nb_merged = 1;
+ tbl->item_num++;
+
+ /* if the previous packet exists, chain the new one with it */
+ if (prev_idx != INVALID_ARRAY_INDEX) {
+ tbl->items[item_idx].next_pkt_idx =
+ tbl->items[prev_idx].next_pkt_idx;
+ tbl->items[prev_idx].next_pkt_idx = item_idx;
+ }
+
+ return item_idx;
+}
+
+static inline uint32_t
+delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
+ uint32_t prev_item_idx)
+{
+ uint32_t next_idx = tbl->items[item_idx].next_pkt_idx;
+
+ /* set NULL to firstseg to indicate it's an empty item */
+ tbl->items[item_idx].firstseg = NULL;
+ tbl->item_num--;
+ if (prev_item_idx != INVALID_ARRAY_INDEX)
+ tbl->items[prev_item_idx].next_pkt_idx = next_idx;
+
+ return next_idx;
+}
+
+static inline uint32_t
+insert_new_key(struct gro_tcp4_tbl *tbl,
+ struct tcp4_key *key_src,
+ uint32_t item_idx)
+{
+ struct tcp4_key *key_dst;
+ uint32_t key_idx;
+
+ key_idx = find_an_empty_key(tbl);
+ if (key_idx == INVALID_ARRAY_INDEX)
+ return INVALID_ARRAY_INDEX;
+
+ key_dst = &(tbl->keys[key_idx].key);
+
+ ether_addr_copy(&(key_src->eth_saddr), &(key_dst->eth_saddr));
+ ether_addr_copy(&(key_src->eth_daddr), &(key_dst->eth_daddr));
+ key_dst->ip_src_addr = key_src->ip_src_addr;
+ key_dst->ip_dst_addr = key_src->ip_dst_addr;
+ key_dst->recv_ack = key_src->recv_ack;
+ key_dst->src_port = key_src->src_port;
+ key_dst->dst_port = key_src->dst_port;
+
+ /* non-INVALID_ARRAY_INDEX value indicates this key is valid */
+ tbl->keys[key_idx].start_index = item_idx;
+ tbl->key_num++;
+
+ return key_idx;
+}
+
+static inline int
+is_same_key(struct tcp4_key k1, struct tcp4_key k2)
+{
+ if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0)
+ return 0;
+
+ if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0)
+ return 0;
+
+ return ((k1.ip_src_addr == k2.ip_src_addr) &&
+ (k1.ip_dst_addr == k2.ip_dst_addr) &&
+ (k1.recv_ack == k2.recv_ack) &&
+ (k1.src_port == k2.src_port) &&
+ (k1.dst_port == k2.dst_port));
+}
+
+/*
+ * update packet length for the flushed packet.
+ */
+static inline void
+update_header(struct gro_tcp4_item *item)
+{
+ struct ipv4_hdr *ipv4_hdr;
+ struct rte_mbuf *pkt = item->firstseg;
+
+ ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+ pkt->l2_len);
+ ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len -
+ pkt->l2_len);
+}
+
+int32_t
+gro_tcp4_reassemble(struct rte_mbuf *pkt,
+ struct gro_tcp4_tbl *tbl,
+ uint64_t start_time)
+{
+ struct ether_hdr *eth_hdr;
+ struct ipv4_hdr *ipv4_hdr;
+ struct tcp_hdr *tcp_hdr;
+ uint32_t sent_seq;
+ uint16_t tcp_dl, ip_id;
+
+ struct tcp4_key key;
+ uint32_t cur_idx, prev_idx, item_idx;
+ uint32_t i, max_key_num;
+ int cmp;
+
+ eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+ ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
+ tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+
+ /*
+ * if FIN, SYN, RST, PSH, URG, ECE or
+ * CWR is set, return immediately.
+ */
+ if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
+ return -1;
+ /* if payload length is 0, return immediately */
+ tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len -
+ pkt->l4_len;
+ if (tcp_dl == 0)
+ return -1;
+
+ ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+ sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
+
+ ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr));
+ ether_addr_copy(&(eth_hdr->d_addr), &(key.eth_daddr));
+ key.ip_src_addr = ipv4_hdr->src_addr;
+ key.ip_dst_addr = ipv4_hdr->dst_addr;
+ key.src_port = tcp_hdr->src_port;
+ key.dst_port = tcp_hdr->dst_port;
+ key.recv_ack = tcp_hdr->recv_ack;
+
+ /* search for a key */
+ max_key_num = tbl->max_key_num;
+ for (i = 0; i < max_key_num; i++) {
+ if ((tbl->keys[i].start_index != INVALID_ARRAY_INDEX) &&
+ is_same_key(tbl->keys[i].key, key))
+ break;
+ }
+
+ /* can't find a key, so insert a new key and a new item. */
+ if (i == tbl->max_key_num) {
+ item_idx = insert_new_item(tbl, pkt, ip_id, sent_seq,
+ INVALID_ARRAY_INDEX, start_time);
+ if (item_idx == INVALID_ARRAY_INDEX)
+ return -1;
+ if (insert_new_key(tbl, &key, item_idx) ==
+ INVALID_ARRAY_INDEX) {
+ /*
+ * fail to insert a new key, so
+ * delete the inserted item
+ */
+ delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
+ return -1;
+ }
+ return 0;
+ }
+
+ /* traverse all packets in the item group to find one to merge */
+ cur_idx = tbl->keys[i].start_index;
+ prev_idx = cur_idx;
+ do {
+ cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
+ pkt->l4_len, tcp_dl, ip_id, sent_seq);
+ if (cmp) {
+ if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
+ pkt, ip_id,
+ sent_seq, cmp))
+ return 1;
+ /*
+ * fail to merge two packets since the packet
+ * length will be greater than the max value.
+ * So insert the packet into the item group.
+ */
+ if (insert_new_item(tbl, pkt, ip_id, sent_seq,
+ prev_idx, start_time) ==
+ INVALID_ARRAY_INDEX)
+ return -1;
+ return 0;
+ }
+ prev_idx = cur_idx;
+ cur_idx = tbl->items[cur_idx].next_pkt_idx;
+ } while (cur_idx != INVALID_ARRAY_INDEX);
+
+ /*
+ * can't find a packet in the item group to merge,
+ * so insert the packet into the item group.
+ */
+ if (insert_new_item(tbl, pkt, ip_id, sent_seq, prev_idx,
+ start_time) == INVALID_ARRAY_INDEX)
+ return -1;
+
+ return 0;
+}
+
+uint16_t
+gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
+ uint64_t flush_timestamp,
+ struct rte_mbuf **out,
+ uint16_t nb_out)
+{
+ uint16_t k = 0;
+ uint32_t i, j;
+ uint32_t max_key_num = tbl->max_key_num;
+
+ for (i = 0; i < max_key_num; i++) {
+ /* all keys have been checked, return immediately */
+ if (tbl->key_num == 0)
+ return k;
+
+ j = tbl->keys[i].start_index;
+ while (j != INVALID_ARRAY_INDEX) {
+ if (tbl->items[j].start_time <= flush_timestamp) {
+ out[k++] = tbl->items[j].firstseg;
+ if (tbl->items[j].nb_merged > 1)
+ update_header(&(tbl->items[j]));
+ /*
+ * delete the item and get
+ * the next packet index
+ */
+ j = delete_item(tbl, j,
+ INVALID_ARRAY_INDEX);
+
+ /*
+ * delete the key as all of
+ * packets are flushed
+ */
+ if (j == INVALID_ARRAY_INDEX) {
+ tbl->keys[i].start_index =
+ INVALID_ARRAY_INDEX;
+ tbl->key_num--;
+ } else
+ /* update start_index of the key */
+ tbl->keys[i].start_index = j;
+
+ if (k == nb_out)
+ return k;
+ } else
+ /*
+ * left packets of this key won't be
+ * timeout, so go to check other keys.
+ */
+ break;
+ }
+ }
+ return k;
+}
+
+uint32_t
+gro_tcp4_tbl_pkt_count(void *tbl)
+{
+ struct gro_tcp4_tbl *gro_tbl = tbl;
+
+ if (gro_tbl)
+ return gro_tbl->item_num;
+
+ return 0;
+}
diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h
new file mode 100644
index 00000000..f41dcee3
--- /dev/null
+++ b/lib/librte_gro/gro_tcp4.h
@@ -0,0 +1,210 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _GRO_TCP4_H_
+#define _GRO_TCP4_H_
+
+#define INVALID_ARRAY_INDEX 0xffffffffUL
+#define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
+
+/*
+ * the max L3 length of a TCP/IPv4 packet. The L3 length
+ * is the sum of ipv4 header, tcp header and L4 payload.
+ */
+#define TCP4_MAX_L3_LENGTH UINT16_MAX
+
+/* criteria of mergeing packets */
+struct tcp4_key {
+ struct ether_addr eth_saddr;
+ struct ether_addr eth_daddr;
+ uint32_t ip_src_addr;
+ uint32_t ip_dst_addr;
+
+ uint32_t recv_ack;
+ uint16_t src_port;
+ uint16_t dst_port;
+};
+
+struct gro_tcp4_key {
+ struct tcp4_key key;
+ /*
+ * the index of the first packet in the item group.
+ * If the value is INVALID_ARRAY_INDEX, it means
+ * the key is empty.
+ */
+ uint32_t start_index;
+};
+
+struct gro_tcp4_item {
+ /*
+ * first segment of the packet. If the value
+ * is NULL, it means the item is empty.
+ */
+ struct rte_mbuf *firstseg;
+ /* last segment of the packet */
+ struct rte_mbuf *lastseg;
+ /*
+ * the time when the first packet is inserted
+ * into the table. If a packet in the table is
+ * merged with an incoming packet, this value
+ * won't be updated. We set this value only
+ * when the first packet is inserted into the
+ * table.
+ */
+ uint64_t start_time;
+ /*
+ * we use next_pkt_idx to chain the packets that
+ * have same key value but can't be merged together.
+ */
+ uint32_t next_pkt_idx;
+ /* the sequence number of the packet */
+ uint32_t sent_seq;
+ /* the IP ID of the packet */
+ uint16_t ip_id;
+ /* the number of merged packets */
+ uint16_t nb_merged;
+};
+
+/*
+ * TCP/IPv4 reassembly table structure.
+ */
+struct gro_tcp4_tbl {
+ /* item array */
+ struct gro_tcp4_item *items;
+ /* key array */
+ struct gro_tcp4_key *keys;
+ /* current item number */
+ uint32_t item_num;
+ /* current key num */
+ uint32_t key_num;
+ /* item array size */
+ uint32_t max_item_num;
+ /* key array size */
+ uint32_t max_key_num;
+};
+
+/**
+ * This function creates a TCP/IPv4 reassembly table.
+ *
+ * @param socket_id
+ * socket index for allocating TCP/IPv4 reassemblt table
+ * @param max_flow_num
+ * the maximum number of flows in the TCP/IPv4 GRO table
+ * @param max_item_per_flow
+ * the maximum packet number per flow.
+ *
+ * @return
+ * if create successfully, return a pointer which points to the
+ * created TCP/IPv4 GRO table. Otherwise, return NULL.
+ */
+void *gro_tcp4_tbl_create(uint16_t socket_id,
+ uint16_t max_flow_num,
+ uint16_t max_item_per_flow);
+
+/**
+ * This function destroys a TCP/IPv4 reassembly table.
+ *
+ * @param tbl
+ * a pointer points to the TCP/IPv4 reassembly table.
+ */
+void gro_tcp4_tbl_destroy(void *tbl);
+
+/**
+ * This function searches for a packet in the TCP/IPv4 reassembly table
+ * to merge with the inputted one. To merge two packets is to chain them
+ * together and update packet headers. Packets, whose SYN, FIN, RST, PSH
+ * CWR, ECE or URG bit is set, are returned immediately. Packets which
+ * only have packet headers (i.e. without data) are also returned
+ * immediately. Otherwise, the packet is either merged, or inserted into
+ * the table. Besides, if there is no available space to insert the
+ * packet, this function returns immediately too.
+ *
+ * This function assumes the inputted packet is with correct IPv4 and
+ * TCP checksums. And if two packets are merged, it won't re-calculate
+ * IPv4 and TCP checksums. Besides, if the inputted packet is IP
+ * fragmented, it assumes the packet is complete (with TCP header).
+ *
+ * @param pkt
+ * packet to reassemble.
+ * @param tbl
+ * a pointer that points to a TCP/IPv4 reassembly table.
+ * @start_time
+ * the start time that the packet is inserted into the table
+ *
+ * @return
+ * if the packet doesn't have data, or SYN, FIN, RST, PSH, CWR, ECE
+ * or URG bit is set, or there is no available space in the table to
+ * insert a new item or a new key, return a negative value. If the
+ * packet is merged successfully, return an positive value. If the
+ * packet is inserted into the table, return 0.
+ */
+int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt,
+ struct gro_tcp4_tbl *tbl,
+ uint64_t start_time);
+
+/**
+ * This function flushes timeout packets in a TCP/IPv4 reassembly table
+ * to applications, and without updating checksums for merged packets.
+ * The max number of flushed timeout packets is the element number of
+ * the array which is used to keep flushed packets.
+ *
+ * @param tbl
+ * a pointer that points to a TCP GRO table.
+ * @param flush_timestamp
+ * this function flushes packets which are inserted into the table
+ * before or at the flush_timestamp.
+ * @param out
+ * pointer array which is used to keep flushed packets.
+ * @param nb_out
+ * the element number of out. It's also the max number of timeout
+ * packets that can be flushed finally.
+ *
+ * @return
+ * the number of packets that are returned.
+ */
+uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
+ uint64_t flush_timestamp,
+ struct rte_mbuf **out,
+ uint16_t nb_out);
+
+/**
+ * This function returns the number of the packets in a TCP/IPv4
+ * reassembly table.
+ *
+ * @param tbl
+ * pointer points to a TCP/IPv4 reassembly table.
+ *
+ * @return
+ * the number of packets in the table
+ */
+uint32_t gro_tcp4_tbl_pkt_count(void *tbl);
+#endif
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
new file mode 100644
index 00000000..7853246a
--- /dev/null
+++ b/lib/librte_gro/rte_gro.c
@@ -0,0 +1,278 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+
+#include "rte_gro.h"
+#include "gro_tcp4.h"
+
+typedef void *(*gro_tbl_create_fn)(uint16_t socket_id,
+ uint16_t max_flow_num,
+ uint16_t max_item_per_flow);
+typedef void (*gro_tbl_destroy_fn)(void *tbl);
+typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl);
+
+static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = {
+ gro_tcp4_tbl_create, NULL};
+static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
+ gro_tcp4_tbl_destroy, NULL};
+static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
+ gro_tcp4_tbl_pkt_count, NULL};
+
+/*
+ * GRO context structure, which is used to merge packets. It keeps
+ * many reassembly tables of desired GRO types. Applications need to
+ * create GRO context objects before using rte_gro_reassemble to
+ * perform GRO.
+ */
+struct gro_ctx {
+ /* GRO types to perform */
+ uint64_t gro_types;
+ /* reassembly tables */
+ void *tbls[RTE_GRO_TYPE_MAX_NUM];
+};
+
+void *
+rte_gro_ctx_create(const struct rte_gro_param *param)
+{
+ struct gro_ctx *gro_ctx;
+ gro_tbl_create_fn create_tbl_fn;
+ uint64_t gro_type_flag = 0;
+ uint64_t gro_types = 0;
+ uint8_t i;
+
+ gro_ctx = rte_zmalloc_socket(__func__,
+ sizeof(struct gro_ctx),
+ RTE_CACHE_LINE_SIZE,
+ param->socket_id);
+ if (gro_ctx == NULL)
+ return NULL;
+
+ for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) {
+ gro_type_flag = 1ULL << i;
+ if ((param->gro_types & gro_type_flag) == 0)
+ continue;
+
+ create_tbl_fn = tbl_create_fn[i];
+ if (create_tbl_fn == NULL)
+ continue;
+
+ gro_ctx->tbls[i] = create_tbl_fn(param->socket_id,
+ param->max_flow_num,
+ param->max_item_per_flow);
+ if (gro_ctx->tbls[i] == NULL) {
+ /* destroy all created tables */
+ gro_ctx->gro_types = gro_types;
+ rte_gro_ctx_destroy(gro_ctx);
+ return NULL;
+ }
+ gro_types |= gro_type_flag;
+ }
+ gro_ctx->gro_types = param->gro_types;
+
+ return gro_ctx;
+}
+
+void
+rte_gro_ctx_destroy(void *ctx)
+{
+ gro_tbl_destroy_fn destroy_tbl_fn;
+ struct gro_ctx *gro_ctx = ctx;
+ uint64_t gro_type_flag;
+ uint8_t i;
+
+ if (gro_ctx == NULL)
+ return;
+ for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) {
+ gro_type_flag = 1ULL << i;
+ if ((gro_ctx->gro_types & gro_type_flag) == 0)
+ continue;
+ destroy_tbl_fn = tbl_destroy_fn[i];
+ if (destroy_tbl_fn)
+ destroy_tbl_fn(gro_ctx->tbls[i]);
+ }
+ rte_free(gro_ctx);
+}
+
+uint16_t
+rte_gro_reassemble_burst(struct rte_mbuf **pkts,
+ uint16_t nb_pkts,
+ const struct rte_gro_param *param)
+{
+ uint16_t i;
+ uint16_t nb_after_gro = nb_pkts;
+ uint32_t item_num;
+
+ /* allocate a reassembly table for TCP/IPv4 GRO */
+ struct gro_tcp4_tbl tcp_tbl;
+ struct gro_tcp4_key tcp_keys[RTE_GRO_MAX_BURST_ITEM_NUM];
+ struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
+
+ struct rte_mbuf *unprocess_pkts[nb_pkts];
+ uint16_t unprocess_num = 0;
+ int32_t ret;
+ uint64_t current_time;
+
+ if ((param->gro_types & RTE_GRO_TCP_IPV4) == 0)
+ return nb_pkts;
+
+ /* get the actual number of packets */
+ item_num = RTE_MIN(nb_pkts, (param->max_flow_num *
+ param->max_item_per_flow));
+ item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
+
+ for (i = 0; i < item_num; i++)
+ tcp_keys[i].start_index = INVALID_ARRAY_INDEX;
+
+ tcp_tbl.keys = tcp_keys;
+ tcp_tbl.items = tcp_items;
+ tcp_tbl.key_num = 0;
+ tcp_tbl.item_num = 0;
+ tcp_tbl.max_key_num = item_num;
+ tcp_tbl.max_item_num = item_num;
+
+ current_time = rte_rdtsc();
+
+ for (i = 0; i < nb_pkts; i++) {
+ if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
+ RTE_PTYPE_L4_TCP)) ==
+ (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
+ ret = gro_tcp4_reassemble(pkts[i],
+ &tcp_tbl,
+ current_time);
+ if (ret > 0)
+ /* merge successfully */
+ nb_after_gro--;
+ else if (ret < 0) {
+ unprocess_pkts[unprocess_num++] =
+ pkts[i];
+ }
+ } else
+ unprocess_pkts[unprocess_num++] = pkts[i];
+ }
+
+ /* re-arrange GROed packets */
+ if (nb_after_gro < nb_pkts) {
+ i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, current_time,
+ pkts, nb_pkts);
+ if (unprocess_num > 0) {
+ memcpy(&pkts[i], unprocess_pkts,
+ sizeof(struct rte_mbuf *) *
+ unprocess_num);
+ }
+ }
+
+ return nb_after_gro;
+}
+
+uint16_t
+rte_gro_reassemble(struct rte_mbuf **pkts,
+ uint16_t nb_pkts,
+ void *ctx)
+{
+ uint16_t i, unprocess_num = 0;
+ struct rte_mbuf *unprocess_pkts[nb_pkts];
+ struct gro_ctx *gro_ctx = ctx;
+ uint64_t current_time;
+
+ if ((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0)
+ return nb_pkts;
+
+ current_time = rte_rdtsc();
+
+ for (i = 0; i < nb_pkts; i++) {
+ if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
+ RTE_PTYPE_L4_TCP)) ==
+ (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
+ if (gro_tcp4_reassemble(pkts[i],
+ gro_ctx->tbls
+ [RTE_GRO_TCP_IPV4_INDEX],
+ current_time) < 0)
+ unprocess_pkts[unprocess_num++] = pkts[i];
+ } else
+ unprocess_pkts[unprocess_num++] = pkts[i];
+ }
+ if (unprocess_num > 0) {
+ memcpy(pkts, unprocess_pkts,
+ sizeof(struct rte_mbuf *) *
+ unprocess_num);
+ }
+
+ return unprocess_num;
+}
+
+uint16_t
+rte_gro_timeout_flush(void *ctx,
+ uint64_t timeout_cycles,
+ uint64_t gro_types,
+ struct rte_mbuf **out,
+ uint16_t max_nb_out)
+{
+ struct gro_ctx *gro_ctx = ctx;
+ uint64_t flush_timestamp;
+
+ gro_types = gro_types & gro_ctx->gro_types;
+ flush_timestamp = rte_rdtsc() - timeout_cycles;
+
+ if (gro_types & RTE_GRO_TCP_IPV4) {
+ return gro_tcp4_tbl_timeout_flush(
+ gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX],
+ flush_timestamp,
+ out, max_nb_out);
+ }
+ return 0;
+}
+
+uint64_t
+rte_gro_get_pkt_count(void *ctx)
+{
+ struct gro_ctx *gro_ctx = ctx;
+ gro_tbl_pkt_count_fn pkt_count_fn;
+ uint64_t item_num = 0;
+ uint64_t gro_type_flag;
+ uint8_t i;
+
+ for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) {
+ gro_type_flag = 1ULL << i;
+ if ((gro_ctx->gro_types & gro_type_flag) == 0)
+ continue;
+
+ pkt_count_fn = tbl_pkt_count_fn[i];
+ if (pkt_count_fn == NULL)
+ continue;
+ item_num += pkt_count_fn(gro_ctx->tbls[i]);
+ }
+ return item_num;
+}
diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
new file mode 100644
index 00000000..d57e0c5f
--- /dev/null
+++ b/lib/librte_gro/rte_gro.h
@@ -0,0 +1,222 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_GRO_H_
+#define _RTE_GRO_H_
+
+/**
+ * @file
+ * Interface to GRO library
+ */
+
+#include <stdint.h>
+#include <rte_mbuf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_GRO_MAX_BURST_ITEM_NUM 128U
+/**< the max number of packets that rte_gro_reassemble_burst()
+ * can process in each invocation.
+ */
+#define RTE_GRO_TYPE_MAX_NUM 64
+/**< the max number of supported GRO types */
+#define RTE_GRO_TYPE_SUPPORT_NUM 1
+/**< the number of currently supported GRO types */
+
+#define RTE_GRO_TCP_IPV4_INDEX 0
+#define RTE_GRO_TCP_IPV4 (1ULL << RTE_GRO_TCP_IPV4_INDEX)
+/**< TCP/IPv4 GRO flag */
+
+/**
+ * A structure which is used to create GRO context objects or tell
+ * rte_gro_reassemble_burst() what reassembly rules are demanded.
+ */
+struct rte_gro_param {
+ uint64_t gro_types;
+ /**< desired GRO types */
+ uint16_t max_flow_num;
+ /**< max flow number */
+ uint16_t max_item_per_flow;
+ /**< max packet number per flow */
+ uint16_t socket_id;
+ /**< socket index for allocating GRO related data structures,
+ * like reassembly tables. When use rte_gro_reassemble_burst(),
+ * applications don't need to set this value.
+ */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * This function create a GRO context object, which is used to merge
+ * packets in rte_gro_reassemble().
+ *
+ * @param param
+ * applications use it to pass needed parameters to create a GRO
+ * context object.
+ *
+ * @return
+ * if create successfully, return a pointer which points to the GRO
+ * context object. Otherwise, return NULL.
+ */
+void *rte_gro_ctx_create(const struct rte_gro_param *param);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * This function destroys a GRO context object.
+ *
+ * @param ctx
+ * pointer points to a GRO context object.
+ */
+void rte_gro_ctx_destroy(void *ctx);
+
+/**
+ * This is one of the main reassembly APIs, which merges numbers of
+ * packets at a time. It assumes that all inputted packets are with
+ * correct checksums. That is, applications should guarantee all
+ * inputted packets are correct. Besides, it doesn't re-calculate
+ * checksums for merged packets. If inputted packets are IP fragmented,
+ * this function assumes them are complete (i.e. with L4 header). After
+ * finishing processing, it returns all GROed packets to applications
+ * immediately.
+ *
+ * @param pkts
+ * a pointer array which points to the packets to reassemble. Besides,
+ * it keeps mbuf addresses for the GROed packets.
+ * @param nb_pkts
+ * the number of packets to reassemble.
+ * @param param
+ * applications use it to tell rte_gro_reassemble_burst() what rules
+ * are demanded.
+ *
+ * @return
+ * the number of packets after been GROed. If no packets are merged,
+ * the returned value is nb_pkts.
+ */
+uint16_t rte_gro_reassemble_burst(struct rte_mbuf **pkts,
+ uint16_t nb_pkts,
+ const struct rte_gro_param *param);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Reassembly function, which tries to merge inputted packets with
+ * the packets in the reassembly tables of a given GRO context. This
+ * function assumes all inputted packets are with correct checksums.
+ * And it won't update checksums if two packets are merged. Besides,
+ * if inputted packets are IP fragmented, this function assumes they
+ * are complete packets (i.e. with L4 header).
+ *
+ * If the inputted packets don't have data or are with unsupported GRO
+ * types etc., they won't be processed and are returned to applications.
+ * Otherwise, the inputted packets are either merged or inserted into
+ * the table. If applications want get packets in the table, they need
+ * to call flush API.
+ *
+ * @param pkts
+ * packet to reassemble. Besides, after this function finishes, it
+ * keeps the unprocessed packets (e.g. without data or unsupported
+ * GRO types).
+ * @param nb_pkts
+ * the number of packets to reassemble.
+ * @param ctx
+ * a pointer points to a GRO context object.
+ *
+ * @return
+ * return the number of unprocessed packets (e.g. without data or
+ * unsupported GRO types). If all packets are processed (merged or
+ * inserted into the table), return 0.
+ */
+uint16_t rte_gro_reassemble(struct rte_mbuf **pkts,
+ uint16_t nb_pkts,
+ void *ctx);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * This function flushes the timeout packets from reassembly tables of
+ * desired GRO types. The max number of flushed timeout packets is the
+ * element number of the array which is used to keep the flushed packets.
+ *
+ * Besides, this function won't re-calculate checksums for merged
+ * packets in the tables. That is, the returned packets may be with
+ * wrong checksums.
+ *
+ * @param ctx
+ * a pointer points to a GRO context object.
+ * @param timeout_cycles
+ * max TTL for packets in reassembly tables, measured in nanosecond.
+ * @param gro_types
+ * this function only flushes packets which belong to the GRO types
+ * specified by gro_types.
+ * @param out
+ * a pointer array that is used to keep flushed timeout packets.
+ * @param max_nb_out
+ * the element number of out. It's also the max number of timeout
+ * packets that can be flushed finally.
+ *
+ * @return
+ * the number of flushed packets. If no packets are flushed, return 0.
+ */
+uint16_t rte_gro_timeout_flush(void *ctx,
+ uint64_t timeout_cycles,
+ uint64_t gro_types,
+ struct rte_mbuf **out,
+ uint16_t max_nb_out);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * This function returns the number of packets in all reassembly tables
+ * of a given GRO context.
+ *
+ * @param ctx
+ * pointer points to a GRO context object.
+ *
+ * @return
+ * the number of packets in all reassembly tables.
+ */
+uint64_t rte_gro_get_pkt_count(void *ctx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_GRO_H_ */
diff --git a/lib/librte_gro/rte_gro_version.map b/lib/librte_gro/rte_gro_version.map
new file mode 100644
index 00000000..bb40bb41
--- /dev/null
+++ b/lib/librte_gro/rte_gro_version.map
@@ -0,0 +1,12 @@
+DPDK_17.08 {
+ global:
+
+ rte_gro_ctrl_create;
+ rte_gro_ctrl_destroy;
+ rte_gro_get_pkt_count;
+ rte_gro_reassemble;
+ rte_gro_reassemble_burst;
+ rte_gro_timeout_flush;
+
+ local: *;
+};
diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
index d856aa26..9cf13a04 100644
--- a/lib/librte_hash/Makefile
+++ b/lib/librte_hash/Makefile
@@ -49,8 +49,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c
SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h
SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_hash_crc.h
ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
+ifneq ($(findstring RTE_MACHINE_CPUFLAG_CRC32,$(CFLAGS)),)
SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_crc_arm64.h
endif
+endif
SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h
SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h
SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h
diff --git a/lib/librte_hash/rte_cmp_arm64.h b/lib/librte_hash/rte_cmp_arm64.h
index 6fd937b1..950cef3b 100644
--- a/lib/librte_hash/rte_cmp_arm64.h
+++ b/lib/librte_hash/rte_cmp_arm64.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2015 Cavium networks. All rights reserved.
+ * Copyright(c) 2015 Cavium, Inc. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -14,7 +14,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
diff --git a/lib/librte_hash/rte_cmp_x86.h b/lib/librte_hash/rte_cmp_x86.h
index e8c484d6..704c2dec 100644
--- a/lib/librte_hash/rte_cmp_x86.h
+++ b/lib/librte_hash/rte_cmp_x86.h
@@ -37,15 +37,9 @@ rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unu
{
const __m128i k1 = _mm_loadu_si128((const __m128i *) key1);
const __m128i k2 = _mm_loadu_si128((const __m128i *) key2);
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_1
const __m128i x = _mm_xor_si128(k1, k2);
return !_mm_test_all_zeros(x, x);
-#else
- const __m128i x = _mm_cmpeq_epi32(k1, k2);
-
- return _mm_movemask_epi8(x) != 0xffff;
-#endif
}
static int
diff --git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_arm64.h
index 2abe42ab..774428be 100644
--- a/lib/librte_hash/rte_crc_arm64.h
+++ b/lib/librte_hash/rte_crc_arm64.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2015 Cavium networks. All rights reserved.
+ * Copyright(c) 2015 Cavium, Inc. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -14,7 +14,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
@@ -52,7 +52,6 @@ extern "C" {
static inline uint32_t
crc32c_arm64_u8(uint8_t data, uint32_t init_val)
{
- asm(".arch armv8-a+crc");
__asm__ volatile(
"crc32cb %w[crc], %w[crc], %w[value]"
: [crc] "+r" (init_val)
@@ -63,7 +62,6 @@ crc32c_arm64_u8(uint8_t data, uint32_t init_val)
static inline uint32_t
crc32c_arm64_u16(uint16_t data, uint32_t init_val)
{
- asm(".arch armv8-a+crc");
__asm__ volatile(
"crc32ch %w[crc], %w[crc], %w[value]"
: [crc] "+r" (init_val)
@@ -74,7 +72,6 @@ crc32c_arm64_u16(uint16_t data, uint32_t init_val)
static inline uint32_t
crc32c_arm64_u32(uint32_t data, uint32_t init_val)
{
- asm(".arch armv8-a+crc");
__asm__ volatile(
"crc32cw %w[crc], %w[crc], %w[value]"
: [crc] "+r" (init_val)
@@ -85,7 +82,6 @@ crc32c_arm64_u32(uint32_t data, uint32_t init_val)
static inline uint32_t
crc32c_arm64_u64(uint64_t data, uint32_t init_val)
{
- asm(".arch armv8-a+crc");
__asm__ volatile(
"crc32cx %w[crc], %w[crc], %x[value]"
: [crc] "+r" (init_val)
diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index 645c0cfa..87b25c01 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -52,11 +52,11 @@
#include <rte_errno.h>
#include <rte_string_fns.h>
#include <rte_cpuflags.h>
-#include <rte_log.h>
#include <rte_rwlock.h>
#include <rte_spinlock.h>
#include <rte_ring.h>
#include <rte_compat.h>
+#include <rte_pause.h>
#include "rte_hash.h"
#include "rte_cuckoo_hash.h"
@@ -538,8 +538,10 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
n_slots = rte_ring_mc_dequeue_burst(h->free_slots,
cached_free_slots->objs,
LCORE_CACHE_SIZE, NULL);
- if (n_slots == 0)
- return -ENOSPC;
+ if (n_slots == 0) {
+ ret = -ENOSPC;
+ goto failure;
+ }
cached_free_slots->len += n_slots;
}
@@ -548,8 +550,10 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
cached_free_slots->len--;
slot_id = cached_free_slots->objs[cached_free_slots->len];
} else {
- if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0)
- return -ENOSPC;
+ if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) {
+ ret = -ENOSPC;
+ goto failure;
+ }
}
new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
@@ -569,7 +573,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
k->pdata = data;
/*
* Return index where key is stored,
- * substracting the first dummy index
+ * subtracting the first dummy index
*/
return prim_bkt->key_idx[i] - 1;
}
@@ -589,7 +593,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
k->pdata = data;
/*
* Return index where key is stored,
- * substracting the first dummy index
+ * subtracting the first dummy index
*/
return sec_bkt->key_idx[i] - 1;
}
@@ -659,6 +663,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
/* Error in addition, store new slot back in the ring and return error */
enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx));
+failure:
if (h->add_key == ADD_KEY_MULTIWRITER)
rte_spinlock_unlock(h->multiwriter_lock);
return ret;
@@ -730,7 +735,7 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
*data = k->pdata;
/*
* Return index where key is stored,
- * substracting the first dummy index
+ * subtracting the first dummy index
*/
return bkt->key_idx[i] - 1;
}
@@ -753,7 +758,7 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
*data = k->pdata;
/*
* Return index where key is stored,
- * substracting the first dummy index
+ * subtracting the first dummy index
*/
return bkt->key_idx[i] - 1;
}
@@ -847,7 +852,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
/*
* Return index where key is stored,
- * substracting the first dummy index
+ * subtracting the first dummy index
*/
ret = bkt->key_idx[i] - 1;
bkt->key_idx[i] = EMPTY_SLOT;
@@ -872,7 +877,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
/*
* Return index where key is stored,
- * substracting the first dummy index
+ * subtracting the first dummy index
*/
ret = bkt->key_idx[i] - 1;
bkt->key_idx[i] = EMPTY_SLOT;
diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h
index 1b8ffed8..f75392d2 100644
--- a/lib/librte_hash/rte_cuckoo_hash.h
+++ b/lib/librte_hash/rte_cuckoo_hash.h
@@ -58,7 +58,7 @@
#endif
/* Hash function used if none is specified */
-#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32)
+#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32)
#include <rte_hash_crc.h>
#define DEFAULT_HASH_FUNC rte_hash_crc
#else
diff --git a/lib/librte_hash/rte_fbk_hash.h b/lib/librte_hash/rte_fbk_hash.h
index bd46048f..c39c0976 100644
--- a/lib/librte_hash/rte_fbk_hash.h
+++ b/lib/librte_hash/rte_fbk_hash.h
@@ -55,7 +55,7 @@ extern "C" {
#include <string.h>
#ifndef RTE_FBK_HASH_FUNC_DEFAULT
-#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32)
+#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32)
#include <rte_hash_crc.h>
/** Default four-byte key hash function if none is specified. */
#define RTE_FBK_HASH_FUNC_DEFAULT rte_hash_crc_4byte
diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
index 0f485b85..ea6be522 100644
--- a/lib/librte_hash/rte_hash_crc.h
+++ b/lib/librte_hash/rte_hash_crc.h
@@ -387,7 +387,7 @@ crc32c_2words(uint64_t data, uint32_t init_val)
return crc;
}
-#if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64)
+#if defined(RTE_ARCH_X86)
static inline uint32_t
crc32c_sse42_u8(uint8_t data, uint32_t init_val)
{
@@ -453,7 +453,7 @@ crc32c_sse42_u64(uint64_t data, uint64_t init_val)
static uint8_t crc32_alg = CRC32_SW;
-#if defined(RTE_ARCH_ARM64)
+#if defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_CRC32)
#include "rte_crc_arm64.h"
#else
@@ -471,26 +471,12 @@ static uint8_t crc32_alg = CRC32_SW;
static inline void
rte_hash_crc_set_alg(uint8_t alg)
{
- switch (alg) {
-#if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64)
- case CRC32_SSE42_x64:
- if (! rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
- alg = CRC32_SSE42;
-#if __GNUC__ >= 7
- __attribute__ ((fallthrough));
+#if defined(RTE_ARCH_X86)
+ if (alg == CRC32_SSE42_x64 &&
+ !rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
+ alg = CRC32_SSE42;
#endif
- case CRC32_SSE42:
- if (! rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2))
- alg = CRC32_SW;
-#if __GNUC__ >= 7
- __attribute__ ((fallthrough));
-#endif
-#endif
- case CRC32_SW:
- crc32_alg = alg;
- default:
- break;
- }
+ crc32_alg = alg;
}
/* Setting the best available algorithm */
@@ -515,7 +501,7 @@ rte_hash_crc_init_alg(void)
static inline uint32_t
rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
{
-#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64
+#if defined RTE_ARCH_X86
if (likely(crc32_alg & CRC32_SSE42))
return crc32c_sse42_u8(data, init_val);
#endif
@@ -538,7 +524,7 @@ rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
static inline uint32_t
rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
{
-#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64
+#if defined RTE_ARCH_X86
if (likely(crc32_alg & CRC32_SSE42))
return crc32c_sse42_u16(data, init_val);
#endif
@@ -561,7 +547,7 @@ rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
static inline uint32_t
rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
{
-#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64
+#if defined RTE_ARCH_X86
if (likely(crc32_alg & CRC32_SSE42))
return crc32c_sse42_u32(data, init_val);
#endif
@@ -589,7 +575,7 @@ rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
return crc32c_sse42_u64(data, init_val);
#endif
-#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64
+#if defined RTE_ARCH_X86
if (likely(crc32_alg & CRC32_SSE42))
return crc32c_sse42_u64_mimic(data, init_val);
#endif
diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
index a4886a8c..2fffd61d 100644
--- a/lib/librte_hash/rte_thash.h
+++ b/lib/librte_hash/rte_thash.h
@@ -56,11 +56,11 @@ extern "C" {
#include <rte_ip.h>
#include <rte_common.h>
-#ifdef __SSE3__
+#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_NEON)
#include <rte_vect.h>
#endif
-#ifdef __SSE3__
+#ifdef RTE_ARCH_X86
/* Byte swap mask used for converting IPv6 address
* 4-byte chunks to CPU byte order
*/
@@ -134,7 +134,7 @@ struct rte_ipv6_tuple {
union rte_thash_tuple {
struct rte_ipv4_tuple v4;
struct rte_ipv6_tuple v6;
-#ifdef __SSE3__
+#ifdef RTE_ARCH_X86
} __attribute__((aligned(XMM_SIZE)));
#else
};
@@ -169,13 +169,18 @@ rte_convert_rss_key(const uint32_t *orig, uint32_t *targ, int len)
static inline void
rte_thash_load_v6_addrs(const struct ipv6_hdr *orig, union rte_thash_tuple *targ)
{
-#ifdef __SSE3__
+#ifdef RTE_ARCH_X86
__m128i ipv6 = _mm_loadu_si128((const __m128i *)orig->src_addr);
*(__m128i *)targ->v6.src_addr =
_mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask);
ipv6 = _mm_loadu_si128((const __m128i *)orig->dst_addr);
*(__m128i *)targ->v6.dst_addr =
_mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask);
+#elif defined(RTE_MACHINE_CPUFLAG_NEON)
+ uint8x16_t ipv6 = vld1q_u8((uint8_t const *)orig->src_addr);
+ vst1q_u8((uint8_t *)targ->v6.src_addr, vrev32q_u8(ipv6));
+ ipv6 = vld1q_u8((uint8_t const *)orig->dst_addr);
+ vst1q_u8((uint8_t *)targ->v6.dst_addr, vrev32q_u8(ipv6));
#else
int i;
for (i = 0; i < 4; i++) {
diff --git a/lib/librte_ip_frag/ip_frag_common.h b/lib/librte_ip_frag/ip_frag_common.h
index 835e4f93..9f561965 100644
--- a/lib/librte_ip_frag/ip_frag_common.h
+++ b/lib/librte_ip_frag/ip_frag_common.h
@@ -130,6 +130,26 @@ ip_frag_free(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr)
dr->cnt = k;
}
+/* delete fragment's mbufs immediately instead of using death row */
+static inline void
+ip_frag_free_immediate(struct ip_frag_pkt *fp)
+{
+ uint32_t i;
+
+ for (i = 0; i < fp->last_idx; i++) {
+ if (fp->frags[i].mb != NULL) {
+ IP_FRAG_LOG(DEBUG, "%s:%d\n"
+ "mbuf: %p, tms: %" PRIu64", key: <%" PRIx64 ", %#x>\n",
+ __func__, __LINE__, fp->frags[i].mb, fp->start,
+ fp->key.src_dst[0], fp->key.id);
+ rte_pktmbuf_free(fp->frags[i].mb);
+ fp->frags[i].mb = NULL;
+ }
+ }
+
+ fp->last_idx = 0;
+}
+
/* if key is empty, mark key as in use */
static inline void
ip_frag_inuse(struct rte_ip_frag_tbl *tbl, const struct ip_frag_pkt *fp)
diff --git a/lib/librte_ip_frag/ip_frag_internal.c b/lib/librte_ip_frag/ip_frag_internal.c
index b679ff43..09b755c9 100644
--- a/lib/librte_ip_frag/ip_frag_internal.c
+++ b/lib/librte_ip_frag/ip_frag_internal.c
@@ -34,9 +34,7 @@
#include <stddef.h>
#include <rte_jhash.h>
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
#include <rte_hash_crc.h>
-#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
#include "ip_frag_common.h"
@@ -94,14 +92,14 @@ ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
p = (const uint32_t *)&key->src_dst;
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+#ifdef RTE_ARCH_X86
v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
v = rte_hash_crc_4byte(p[1], v);
v = rte_hash_crc_4byte(key->id, v);
#else
v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE);
-#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+#endif /* RTE_ARCH_X86 */
*v1 = v;
*v2 = (v << 7) + (v >> 14);
@@ -115,7 +113,7 @@ ipv6_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
p = (const uint32_t *) &key->src_dst;
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+#ifdef RTE_ARCH_X86
v = rte_hash_crc_4byte(p[0], PRIME_VALUE);
v = rte_hash_crc_4byte(p[1], v);
v = rte_hash_crc_4byte(p[2], v);
@@ -130,7 +128,7 @@ ipv6_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
v = rte_jhash_3words(p[0], p[1], p[2], PRIME_VALUE);
v = rte_jhash_3words(p[3], p[4], p[5], v);
v = rte_jhash_3words(p[6], p[7], key->id, v);
-#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+#endif /* RTE_ARCH_X86 */
*v1 = v;
*v2 = (v << 7) + (v >> 14);
diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h
index 6708906d..35d0ecc3 100644
--- a/lib/librte_ip_frag/rte_ip_frag.h
+++ b/lib/librte_ip_frag/rte_ip_frag.h
@@ -180,11 +180,8 @@ struct rte_ip_frag_tbl * rte_ip_frag_table_create(uint32_t bucket_num,
* @param tbl
* Fragmentation table to free.
*/
-static inline void
-rte_ip_frag_table_destroy(struct rte_ip_frag_tbl *tbl)
-{
- rte_free(tbl);
-}
+void
+rte_ip_frag_table_destroy(struct rte_ip_frag_tbl *tbl);
/**
* This function implements the fragmentation of IPv6 packets.
@@ -233,7 +230,7 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
* Pointer to the IPv6 fragment extension header.
* @return
* Pointer to mbuf for reassembled packet, or NULL if:
- * - an error occured.
+ * - an error occurred.
* - not all fragments of the packet are collected yet.
*/
struct rte_mbuf *rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
@@ -307,7 +304,7 @@ int32_t rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in,
* Pointer to the IPV4 header inside the fragment.
* @return
* Pointer to mbuf for reassebled packet, or NULL if:
- * - an error occured.
+ * - an error occurred.
* - not all fragments of the packet are collected yet.
*/
struct rte_mbuf * rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
diff --git a/lib/librte_ip_frag/rte_ip_frag_common.c b/lib/librte_ip_frag/rte_ip_frag_common.c
index 6176ff4e..8460f8e8 100644
--- a/lib/librte_ip_frag/rte_ip_frag_common.c
+++ b/lib/librte_ip_frag/rte_ip_frag_common.c
@@ -109,6 +109,19 @@ rte_ip_frag_table_create(uint32_t bucket_num, uint32_t bucket_entries,
return tbl;
}
+/* delete fragmentation table */
+void
+rte_ip_frag_table_destroy(struct rte_ip_frag_tbl *tbl)
+{
+ struct ip_frag_pkt *fp;
+
+ TAILQ_FOREACH(fp, &tbl->lru, lru) {
+ ip_frag_free_immediate(fp);
+ }
+
+ rte_free(tbl);
+}
+
/* dump frag table statistics to file */
void
rte_ip_frag_table_statistics_dump(FILE *f, const struct rte_ip_frag_tbl *tbl)
diff --git a/lib/librte_ip_frag/rte_ipfrag_version.map b/lib/librte_ip_frag/rte_ipfrag_version.map
index 354fa082..d1acf07c 100644
--- a/lib/librte_ip_frag/rte_ipfrag_version.map
+++ b/lib/librte_ip_frag/rte_ipfrag_version.map
@@ -11,3 +11,10 @@ DPDK_2.0 {
local: *;
};
+
+DPDK_17.08 {
+ global:
+
+ rte_ip_frag_table_destroy;
+
+} DPDK_2.0;
diff --git a/lib/librte_ip_frag/rte_ipv4_fragmentation.c b/lib/librte_ip_frag/rte_ipv4_fragmentation.c
index a2259e80..8c5f5ec4 100644
--- a/lib/librte_ip_frag/rte_ipv4_fragmentation.c
+++ b/lib/librte_ip_frag/rte_ipv4_fragmentation.c
@@ -48,7 +48,7 @@
#define IPV4_HDR_DF_MASK (1 << IPV4_HDR_DF_SHIFT)
#define IPV4_HDR_MF_MASK (1 << IPV4_HDR_MF_SHIFT)
-#define IPV4_HDR_FO_MASK ((1 << IPV4_HDR_FO_SHIFT) - 1)
+#define IPV4_HDR_FO_ALIGN (1 << IPV4_HDR_FO_SHIFT)
static inline void __fill_ipv4hdr_frag(struct ipv4_hdr *dst,
const struct ipv4_hdr *src, uint16_t len, uint16_t fofs,
@@ -103,11 +103,14 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in,
uint32_t out_pkt_pos, in_seg_data_pos;
uint32_t more_in_segs;
uint16_t fragment_offset, flag_offset, frag_size;
+ uint16_t frag_bytes_remaining;
- frag_size = (uint16_t)(mtu_size - sizeof(struct ipv4_hdr));
-
- /* Fragment size should be a multiply of 8. */
- RTE_ASSERT((frag_size & IPV4_HDR_FO_MASK) == 0);
+ /*
+ * Ensure the IP payload length of all fragments is aligned to a
+ * multiple of 8 bytes as per RFC791 section 2.3.
+ */
+ frag_size = RTE_ALIGN_FLOOR((mtu_size - sizeof(struct ipv4_hdr)),
+ IPV4_HDR_FO_ALIGN);
in_hdr = rte_pktmbuf_mtod(pkt_in, struct ipv4_hdr *);
flag_offset = rte_cpu_to_be_16(in_hdr->fragment_offset);
@@ -142,6 +145,7 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in,
/* Reserve space for the IP header that will be built later */
out_pkt->data_len = sizeof(struct ipv4_hdr);
out_pkt->pkt_len = sizeof(struct ipv4_hdr);
+ frag_bytes_remaining = frag_size;
out_seg_prev = out_pkt;
more_out_segs = 1;
@@ -161,7 +165,7 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in,
/* Prepare indirect buffer */
rte_pktmbuf_attach(out_seg, in_seg);
- len = mtu_size - out_pkt->pkt_len;
+ len = frag_bytes_remaining;
if (len > (in_seg->data_len - in_seg_data_pos)) {
len = in_seg->data_len - in_seg_data_pos;
}
@@ -171,9 +175,10 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in,
out_pkt->pkt_len);
out_pkt->nb_segs += 1;
in_seg_data_pos += len;
+ frag_bytes_remaining -= len;
/* Current output packet (i.e. fragment) done ? */
- if (unlikely(out_pkt->pkt_len >= mtu_size))
+ if (unlikely(frag_bytes_remaining == 0))
more_out_segs = 0;
/* Current input segment done ? */
diff --git a/lib/librte_ip_frag/rte_ipv4_reassembly.c b/lib/librte_ip_frag/rte_ipv4_reassembly.c
index e084ca59..b1330896 100644
--- a/lib/librte_ip_frag/rte_ipv4_reassembly.c
+++ b/lib/librte_ip_frag/rte_ipv4_reassembly.c
@@ -118,7 +118,7 @@ ipv4_frag_reassemble(struct ip_frag_pkt *fp)
* Pointer to the IPV4 header inside the fragment.
* @return
* Pointer to mbuf for reassebled packet, or NULL if:
- * - an error occured.
+ * - an error occurred.
* - not all fragments of the packet are collected yet.
*/
struct rte_mbuf *
diff --git a/lib/librte_ip_frag/rte_ipv6_reassembly.c b/lib/librte_ip_frag/rte_ipv6_reassembly.c
index 21a5ef5d..dde58cb7 100644
--- a/lib/librte_ip_frag/rte_ipv6_reassembly.c
+++ b/lib/librte_ip_frag/rte_ipv6_reassembly.c
@@ -155,7 +155,7 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
* Pointer to the IPV6 fragment extension header.
* @return
* Pointer to mbuf for reassembled packet, or NULL if:
- * - an error occured.
+ * - an error occurred.
* - not all fragments of the packet are collected yet.
*/
#define MORE_FRAGS(x) (((x) & 0x100) >> 8)
diff --git a/lib/librte_jobstats/rte_jobstats.h b/lib/librte_jobstats/rte_jobstats.h
index b3686030..7e76fd50 100644
--- a/lib/librte_jobstats/rte_jobstats.h
+++ b/lib/librte_jobstats/rte_jobstats.h
@@ -98,7 +98,7 @@ struct rte_jobstats {
} __rte_cache_aligned;
struct rte_jobstats_context {
- /** Viariable holding time at different points:
+ /** Variable holding time at different points:
* -# loop start time if loop was started but no job executed yet.
* -# job start time if job is currently executing.
* -# job finish time if job finished its execution.
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index c3f9208c..8c483c1f 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -119,7 +119,7 @@ struct rte_kni_memzone_pool {
uint32_t max_ifaces; /**< Max. num of KNI ifaces */
struct rte_kni_memzone_slot *slots; /**< Pool slots */
- rte_spinlock_t mutex; /**< alloc/relase mutex */
+ rte_spinlock_t mutex; /**< alloc/release mutex */
/* Free memzone slots linked-list */
struct rte_kni_memzone_slot *free; /**< First empty slot */
@@ -624,6 +624,7 @@ kni_allocate_mbufs(struct rte_kni *kni)
int i, ret;
struct rte_mbuf *pkts[MAX_MBUF_BURST_NUM];
void *phys[MAX_MBUF_BURST_NUM];
+ int allocq_free;
RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pool) !=
offsetof(struct rte_kni_mbuf, pool));
@@ -646,7 +647,9 @@ kni_allocate_mbufs(struct rte_kni *kni)
return;
}
- for (i = 0; i < MAX_MBUF_BURST_NUM; i++) {
+ allocq_free = (kni->alloc_q->read - kni->alloc_q->write - 1) \
+ & (MAX_MBUF_BURST_NUM - 1);
+ for (i = 0; i < allocq_free; i++) {
pkts[i] = rte_pktmbuf_alloc(kni->pktmbuf_pool);
if (unlikely(pkts[i] == NULL)) {
/* Out of memory */
diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c
index 978ac601..64c074e9 100644
--- a/lib/librte_lpm/rte_lpm.c
+++ b/lib/librte_lpm/rte_lpm.c
@@ -36,7 +36,6 @@
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
-#include <errno.h>
#include <sys/queue.h>
#include <rte_log.h>
diff --git a/lib/librte_lpm/rte_lpm6.c b/lib/librte_lpm/rte_lpm6.c
index 9cc7be77..b4a7df34 100644
--- a/lib/librte_lpm/rte_lpm6.c
+++ b/lib/librte_lpm/rte_lpm6.c
@@ -35,7 +35,6 @@
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
-#include <errno.h>
#include <sys/queue.h>
#include <rte_log.h>
diff --git a/lib/librte_lpm/rte_lpm_neon.h b/lib/librte_lpm/rte_lpm_neon.h
index 7efd9a0d..4fd33f33 100644
--- a/lib/librte_lpm/rte_lpm_neon.h
+++ b/lib/librte_lpm/rte_lpm_neon.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2015 Cavium Networks. All rights reserved.
+ * Copyright(c) 2015 Cavium, Inc. All rights reserved.
* All rights reserved.
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
@@ -19,7 +19,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium Networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
diff --git a/lib/librte_lpm/rte_lpm_sse.h b/lib/librte_lpm/rte_lpm_sse.h
index ef33c6a1..5f2c4d4a 100644
--- a/lib/librte_lpm/rte_lpm_sse.h
+++ b/lib/librte_lpm/rte_lpm_sse.h
@@ -78,7 +78,8 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
/* extract values from tbl24[] */
idx = _mm_cvtsi128_si64(i24);
- i24 = _mm_srli_si128(i24, sizeof(uint64_t));
+ /* With -O0 option, gcc 4.8 - 5.4 fails to fold sizeof() into a constant */
+ i24 = _mm_srli_si128(i24, /* sizeof(uint64_t) */ 8);
ptbl = (const uint32_t *)&lpm->tbl24[(uint32_t)idx];
tbl[0] = *ptbl;
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index 0e3e36a5..26a62b8e 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -131,8 +131,7 @@ rte_pktmbuf_init(struct rte_mempool *mp,
RTE_ASSERT(mp->elt_size >= mbuf_size);
RTE_ASSERT(buf_len <= UINT16_MAX);
- memset(m, 0, mp->elt_size);
-
+ memset(m, 0, mbuf_size);
/* start of buffer is after mbuf structure and priv data */
m->priv_size = priv_size;
m->buf_addr = (char *)m + mbuf_size;
@@ -409,6 +408,7 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask)
case PKT_TX_TUNNEL_GRE: return "PKT_TX_TUNNEL_GRE";
case PKT_TX_TUNNEL_IPIP: return "PKT_TX_TUNNEL_IPIP";
case PKT_TX_TUNNEL_GENEVE: return "PKT_TX_TUNNEL_GENEVE";
+ case PKT_TX_TUNNEL_MPLSINUDP: return "PKT_TX_TUNNEL_MPLSINUDP";
case PKT_TX_MACSEC: return "PKT_TX_MACSEC";
default: return NULL;
}
@@ -440,6 +440,8 @@ rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
"PKT_TX_TUNNEL_NONE" },
{ PKT_TX_TUNNEL_GENEVE, PKT_TX_TUNNEL_MASK,
"PKT_TX_TUNNEL_NONE" },
+ { PKT_TX_TUNNEL_MPLSINUDP, PKT_TX_TUNNEL_MASK,
+ "PKT_TX_TUNNEL_NONE" },
{ PKT_TX_MACSEC, PKT_TX_MACSEC, NULL },
};
const char *name;
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 1cb03109..eaed7eee 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -208,6 +208,8 @@ extern "C" {
#define PKT_TX_TUNNEL_GRE (0x2ULL << 45)
#define PKT_TX_TUNNEL_IPIP (0x3ULL << 45)
#define PKT_TX_TUNNEL_GENEVE (0x4ULL << 45)
+/**< TX packet with MPLS-in-UDP RFC 7510 header. */
+#define PKT_TX_TUNNEL_MPLSINUDP (0x5ULL << 45)
/* add new TX TUNNEL type here */
#define PKT_TX_TUNNEL_MASK (0xFULL << 45)
@@ -840,7 +842,7 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp)
* @param m
* The mbuf to be freed.
*/
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_mbuf_raw_free(struct rte_mbuf *m)
{
RTE_ASSERT(RTE_MBUF_DIRECT(m));
@@ -1136,6 +1138,7 @@ static inline struct rte_mbuf *rte_pktmbuf_alloc(struct rte_mempool *mp)
* Array size
* @return
* - 0: Success
+ * - -ENOENT: Not enough entries in the mempool; no mbufs are retrieved.
*/
static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
struct rte_mbuf **mbufs, unsigned count)
@@ -1287,8 +1290,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
* - (m) if it is the last reference. It can be recycled or freed.
* - (NULL) if the mbuf still has remaining references on it.
*/
-__attribute__((always_inline))
-static inline struct rte_mbuf *
+static __rte_always_inline struct rte_mbuf *
rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
{
__rte_mbuf_sanity_check(m, 0);
@@ -1339,7 +1341,7 @@ __rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
* @param m
* The packet mbuf segment to be freed.
*/
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_pktmbuf_free_seg(struct rte_mbuf *m)
{
m = rte_pktmbuf_prefree_seg(m);
@@ -1453,7 +1455,7 @@ static inline void rte_pktmbuf_refcnt_update(struct rte_mbuf *m, int16_t v)
*/
static inline uint16_t rte_pktmbuf_headroom(const struct rte_mbuf *m)
{
- __rte_mbuf_sanity_check(m, 1);
+ __rte_mbuf_sanity_check(m, 0);
return m->data_off;
}
@@ -1467,7 +1469,7 @@ static inline uint16_t rte_pktmbuf_headroom(const struct rte_mbuf *m)
*/
static inline uint16_t rte_pktmbuf_tailroom(const struct rte_mbuf *m)
{
- __rte_mbuf_sanity_check(m, 1);
+ __rte_mbuf_sanity_check(m, 0);
return (uint16_t)(m->buf_len - rte_pktmbuf_headroom(m) -
m->data_len);
}
diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h
index a3269c4c..acd70bb6 100644
--- a/lib/librte_mbuf/rte_mbuf_ptype.h
+++ b/lib/librte_mbuf/rte_mbuf_ptype.h
@@ -341,11 +341,11 @@ extern "C" {
* Packet format:
* <'ether type'=0x0800
* | 'version'=4, 'protocol'=17
- * | 'destination port'=4798>
+ * | 'destination port'=4789>
* or,
* <'ether type'=0x86DD
* | 'version'=6, 'next header'=17
- * | 'destination port'=4798>
+ * | 'destination port'=4789>
*/
#define RTE_PTYPE_TUNNEL_VXLAN 0x00003000
/**
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index f65310f6..6fc3c9c7 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -476,7 +476,7 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr,
/* required for xen_dom0 to get the machine address */
paddr = rte_mem_phy2mch(-1, paddr);
- if (paddr == RTE_BAD_PHYS_ADDR) {
+ if (paddr == RTE_BAD_PHYS_ADDR && rte_eal_has_hugepages()) {
ret = -EINVAL;
goto fail;
}
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 48bc8ea3..76b5b3b1 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -993,7 +993,7 @@ rte_mempool_cache_free(struct rte_mempool_cache *cache);
* @param mp
* A pointer to the mempool.
*/
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_mempool_cache_flush(struct rte_mempool_cache *cache,
struct rte_mempool *mp)
{
@@ -1011,7 +1011,7 @@ rte_mempool_cache_flush(struct rte_mempool_cache *cache,
* @return
* A pointer to the mempool cache or NULL if disabled or non-EAL thread.
*/
-static inline struct rte_mempool_cache *__attribute__((always_inline))
+static __rte_always_inline struct rte_mempool_cache *
rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
{
if (mp->cache_size == 0)
@@ -1038,7 +1038,7 @@ rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
* The flags used for the mempool creation.
* Single-producer (MEMPOOL_F_SP_PUT flag) or multi-producers.
*/
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
__mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
unsigned n, struct rte_mempool_cache *cache)
{
@@ -1100,7 +1100,7 @@ ring_enqueue:
* The flags used for the mempool creation.
* Single-producer (MEMPOOL_F_SP_PUT flag) or multi-producers.
*/
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
unsigned n, struct rte_mempool_cache *cache,
__rte_unused int flags)
@@ -1123,7 +1123,7 @@ rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
* @param n
* The number of objects to add in the mempool from obj_table.
*/
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
unsigned n)
{
@@ -1144,7 +1144,7 @@ rte_mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
* @param obj
* A pointer to the object to be added.
*/
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
rte_mempool_put(struct rte_mempool *mp, void *obj)
{
rte_mempool_put_bulk(mp, &obj, 1);
@@ -1167,7 +1167,7 @@ rte_mempool_put(struct rte_mempool *mp, void *obj)
* - >=0: Success; number of objects supplied.
* - <0: Error; code of ring dequeue function.
*/
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
__mempool_generic_get(struct rte_mempool *mp, void **obj_table,
unsigned n, struct rte_mempool_cache *cache)
{
@@ -1248,7 +1248,7 @@ ring_dequeue:
* - 0: Success; objects taken.
* - -ENOENT: Not enough entries in the mempool; no object is retrieved.
*/
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
rte_mempool_generic_get(struct rte_mempool *mp, void **obj_table, unsigned n,
struct rte_mempool_cache *cache, __rte_unused int flags)
{
@@ -1281,7 +1281,7 @@ rte_mempool_generic_get(struct rte_mempool *mp, void **obj_table, unsigned n,
* - 0: Success; objects taken
* - -ENOENT: Not enough entries in the mempool; no object is retrieved.
*/
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
rte_mempool_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n)
{
struct rte_mempool_cache *cache;
@@ -1309,7 +1309,7 @@ rte_mempool_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n)
* - 0: Success; objects taken.
* - -ENOENT: Not enough entries in the mempool; no object is retrieved.
*/
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
rte_mempool_get(struct rte_mempool *mp, void **obj_p)
{
return rte_mempool_get_bulk(mp, obj_p, 1);
diff --git a/lib/librte_metrics/rte_metrics.c b/lib/librte_metrics/rte_metrics.c
index e9a122c1..b66a72bb 100644
--- a/lib/librte_metrics/rte_metrics.c
+++ b/lib/librte_metrics/rte_metrics.c
@@ -144,6 +144,8 @@ rte_metrics_reg_names(const char * const *names, uint16_t cnt_names)
entry = &stats->metadata[idx_name + stats->cnt_stats];
strncpy(entry->name, names[idx_name],
RTE_METRICS_MAX_NAME_LEN);
+ /* Enforce NULL-termination */
+ entry->name[RTE_METRICS_MAX_NAME_LEN - 1] = '\0';
memset(entry->value, 0, sizeof(entry->value));
entry->idx_next_stat = idx_name + stats->cnt_stats + 1;
}
@@ -176,7 +178,7 @@ rte_metrics_update_values(int port_id,
uint16_t cnt_setsize;
if (port_id != RTE_METRICS_GLOBAL &&
- (port_id < 0 || port_id > RTE_MAX_ETHPORTS))
+ (port_id < 0 || port_id >= RTE_MAX_ETHPORTS))
return -EINVAL;
if (values == NULL)
@@ -263,7 +265,7 @@ rte_metrics_get_values(int port_id,
int return_value;
if (port_id != RTE_METRICS_GLOBAL &&
- (port_id < 0 || port_id > RTE_MAX_ETHPORTS))
+ (port_id < 0 || port_id >= RTE_MAX_ETHPORTS))
return -EINVAL;
memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME);
diff --git a/lib/librte_metrics/rte_metrics.h b/lib/librte_metrics/rte_metrics.h
index 0fa3104e..297300ad 100644
--- a/lib/librte_metrics/rte_metrics.h
+++ b/lib/librte_metrics/rte_metrics.h
@@ -118,7 +118,8 @@ void rte_metrics_init(int socket_id);
* is required for updating said metric's value.
*
* @param name
- * Metric name
+ * Metric name. If this exceeds RTE_METRICS_MAX_NAME_LEN (including
+ * the NULL terminator), it is truncated.
*
* @return
* - Zero or positive: Success (index key of new metric)
diff --git a/lib/librte_net/net_crc_neon.h b/lib/librte_net/net_crc_neon.h
new file mode 100644
index 00000000..201b2c88
--- /dev/null
+++ b/lib/librte_net/net_crc_neon.h
@@ -0,0 +1,297 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium, Inc. 2017.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium, Inc nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NET_CRC_NEON_H_
+#define _NET_CRC_NEON_H_
+
+#include <rte_branch_prediction.h>
+#include <rte_net_crc.h>
+#include <rte_vect.h>
+#include <rte_cpuflags.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** PMULL CRC computation context structure */
+struct crc_pmull_ctx {
+ uint64x2_t rk1_rk2;
+ uint64x2_t rk5_rk6;
+ uint64x2_t rk7_rk8;
+};
+
+struct crc_pmull_ctx crc32_eth_pmull __rte_aligned(16);
+struct crc_pmull_ctx crc16_ccitt_pmull __rte_aligned(16);
+
+/**
+ * @brief Performs one folding round
+ *
+ * Logically function operates as follows:
+ * DATA = READ_NEXT_16BYTES();
+ * F1 = LSB8(FOLD)
+ * F2 = MSB8(FOLD)
+ * T1 = CLMUL(F1, RK1)
+ * T2 = CLMUL(F2, RK2)
+ * FOLD = XOR(T1, T2, DATA)
+ *
+ * @param data_block 16 byte data block
+ * @param precomp precomputed rk1 constanst
+ * @param fold running 16 byte folded data
+ *
+ * @return New 16 byte folded data
+ */
+static inline uint64x2_t
+crcr32_folding_round(uint64x2_t data_block, uint64x2_t precomp,
+ uint64x2_t fold)
+{
+ uint64x2_t tmp0 = vreinterpretq_u64_p128(vmull_p64(
+ vgetq_lane_p64(vreinterpretq_p64_u64(fold), 1),
+ vgetq_lane_p64(vreinterpretq_p64_u64(precomp), 0)));
+
+ uint64x2_t tmp1 = vreinterpretq_u64_p128(vmull_p64(
+ vgetq_lane_p64(vreinterpretq_p64_u64(fold), 0),
+ vgetq_lane_p64(vreinterpretq_p64_u64(precomp), 1)));
+
+ return veorq_u64(tmp1, veorq_u64(data_block, tmp0));
+}
+
+/**
+ * Performs reduction from 128 bits to 64 bits
+ *
+ * @param data128 128 bits data to be reduced
+ * @param precomp rk5 and rk6 precomputed constants
+ *
+ * @return data reduced to 64 bits
+ */
+static inline uint64x2_t
+crcr32_reduce_128_to_64(uint64x2_t data128,
+ uint64x2_t precomp)
+{
+ uint64x2_t tmp0, tmp1, tmp2;
+
+ /* 64b fold */
+ tmp0 = vreinterpretq_u64_p128(vmull_p64(
+ vgetq_lane_p64(vreinterpretq_p64_u64(data128), 0),
+ vgetq_lane_p64(vreinterpretq_p64_u64(precomp), 0)));
+ tmp1 = vshift_bytes_right(data128, 8);
+ tmp0 = veorq_u64(tmp0, tmp1);
+
+ /* 32b fold */
+ tmp2 = vshift_bytes_left(tmp0, 4);
+ tmp1 = vreinterpretq_u64_p128(vmull_p64(
+ vgetq_lane_p64(vreinterpretq_p64_u64(tmp2), 0),
+ vgetq_lane_p64(vreinterpretq_p64_u64(precomp), 1)));
+
+ return veorq_u64(tmp1, tmp0);
+}
+
+/**
+ * Performs Barret's reduction from 64 bits to 32 bits
+ *
+ * @param data64 64 bits data to be reduced
+ * @param precomp rk7 precomputed constant
+ *
+ * @return data reduced to 32 bits
+ */
+static inline uint32_t
+crcr32_reduce_64_to_32(uint64x2_t data64,
+ uint64x2_t precomp)
+{
+ static uint32_t mask1[4] __rte_aligned(16) = {
+ 0xffffffff, 0xffffffff, 0x00000000, 0x00000000
+ };
+ static uint32_t mask2[4] __rte_aligned(16) = {
+ 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff
+ };
+ uint64x2_t tmp0, tmp1, tmp2;
+
+ tmp0 = vandq_u64(data64, vld1q_u64((uint64_t *)mask2));
+
+ tmp1 = vreinterpretq_u64_p128(vmull_p64(
+ vgetq_lane_p64(vreinterpretq_p64_u64(tmp0), 0),
+ vgetq_lane_p64(vreinterpretq_p64_u64(precomp), 0)));
+ tmp1 = veorq_u64(tmp1, tmp0);
+ tmp1 = vandq_u64(tmp1, vld1q_u64((uint64_t *)mask1));
+
+ tmp2 = vreinterpretq_u64_p128(vmull_p64(
+ vgetq_lane_p64(vreinterpretq_p64_u64(tmp1), 0),
+ vgetq_lane_p64(vreinterpretq_p64_u64(precomp), 1)));
+ tmp2 = veorq_u64(tmp2, tmp1);
+ tmp2 = veorq_u64(tmp2, tmp0);
+
+ return vgetq_lane_u32(vreinterpretq_u32_u64(tmp2), 2);
+}
+
+static inline uint32_t
+crc32_eth_calc_pmull(
+ const uint8_t *data,
+ uint32_t data_len,
+ uint32_t crc,
+ const struct crc_pmull_ctx *params)
+{
+ uint64x2_t temp, fold, k;
+ uint32_t n;
+
+ /* Get CRC init value */
+ temp = vreinterpretq_u64_u32(vsetq_lane_u32(crc, vmovq_n_u32(0), 0));
+
+ /**
+ * Folding all data into single 16 byte data block
+ * Assumes: fold holds first 16 bytes of data
+ */
+ if (unlikely(data_len < 32)) {
+ if (unlikely(data_len == 16)) {
+ /* 16 bytes */
+ fold = vld1q_u64((const uint64_t *)data);
+ fold = veorq_u64(fold, temp);
+ goto reduction_128_64;
+ }
+
+ if (unlikely(data_len < 16)) {
+ /* 0 to 15 bytes */
+ uint8_t buffer[16] __rte_aligned(16);
+
+ memset(buffer, 0, sizeof(buffer));
+ memcpy(buffer, data, data_len);
+
+ fold = vld1q_u64((uint64_t *)buffer);
+ fold = veorq_u64(fold, temp);
+ if (unlikely(data_len < 4)) {
+ fold = vshift_bytes_left(fold, 8 - data_len);
+ goto barret_reduction;
+ }
+ fold = vshift_bytes_left(fold, 16 - data_len);
+ goto reduction_128_64;
+ }
+ /* 17 to 31 bytes */
+ fold = vld1q_u64((const uint64_t *)data);
+ fold = veorq_u64(fold, temp);
+ n = 16;
+ k = params->rk1_rk2;
+ goto partial_bytes;
+ }
+
+ /** At least 32 bytes in the buffer */
+ /** Apply CRC initial value */
+ fold = vld1q_u64((const uint64_t *)data);
+ fold = veorq_u64(fold, temp);
+
+ /** Main folding loop - the last 16 bytes is processed separately */
+ k = params->rk1_rk2;
+ for (n = 16; (n + 16) <= data_len; n += 16) {
+ temp = vld1q_u64((const uint64_t *)&data[n]);
+ fold = crcr32_folding_round(temp, k, fold);
+ }
+
+partial_bytes:
+ if (likely(n < data_len)) {
+ uint64x2_t last16, a, b, mask;
+ uint32_t rem = data_len & 15;
+
+ last16 = vld1q_u64((const uint64_t *)&data[data_len - 16]);
+ a = vshift_bytes_left(fold, 16 - rem);
+ b = vshift_bytes_right(fold, rem);
+ mask = vshift_bytes_left(vdupq_n_u64(-1), 16 - rem);
+ b = vorrq_u64(b, vandq_u64(mask, last16));
+
+ /* k = rk1 & rk2 */
+ temp = vreinterpretq_u64_p128(vmull_p64(
+ vgetq_lane_p64(vreinterpretq_p64_u64(a), 1),
+ vgetq_lane_p64(vreinterpretq_p64_u64(k), 0)));
+ fold = vreinterpretq_u64_p128(vmull_p64(
+ vgetq_lane_p64(vreinterpretq_p64_u64(a), 0),
+ vgetq_lane_p64(vreinterpretq_p64_u64(k), 1)));
+ fold = veorq_u64(fold, temp);
+ fold = veorq_u64(fold, b);
+ }
+
+ /** Reduction 128 -> 32 Assumes: fold holds 128bit folded data */
+reduction_128_64:
+ k = params->rk5_rk6;
+ fold = crcr32_reduce_128_to_64(fold, k);
+
+barret_reduction:
+ k = params->rk7_rk8;
+ n = crcr32_reduce_64_to_32(fold, k);
+
+ return n;
+}
+
+static inline void
+rte_net_crc_neon_init(void)
+{
+ /* Initialize CRC16 data */
+ uint64_t ccitt_k1_k2[2] = {0x189aeLLU, 0x8e10LLU};
+ uint64_t ccitt_k5_k6[2] = {0x189aeLLU, 0x114aaLLU};
+ uint64_t ccitt_k7_k8[2] = {0x11c581910LLU, 0x10811LLU};
+
+ /* Initialize CRC32 data */
+ uint64_t eth_k1_k2[2] = {0xccaa009eLLU, 0x1751997d0LLU};
+ uint64_t eth_k5_k6[2] = {0xccaa009eLLU, 0x163cd6124LLU};
+ uint64_t eth_k7_k8[2] = {0x1f7011640LLU, 0x1db710641LLU};
+
+ /** Save the params in context structure */
+ crc16_ccitt_pmull.rk1_rk2 = vld1q_u64(ccitt_k1_k2);
+ crc16_ccitt_pmull.rk5_rk6 = vld1q_u64(ccitt_k5_k6);
+ crc16_ccitt_pmull.rk7_rk8 = vld1q_u64(ccitt_k7_k8);
+
+ /** Save the params in context structure */
+ crc32_eth_pmull.rk1_rk2 = vld1q_u64(eth_k1_k2);
+ crc32_eth_pmull.rk5_rk6 = vld1q_u64(eth_k5_k6);
+ crc32_eth_pmull.rk7_rk8 = vld1q_u64(eth_k7_k8);
+}
+
+static inline uint32_t
+rte_crc16_ccitt_neon_handler(const uint8_t *data,
+ uint32_t data_len)
+{
+ return (uint16_t)~crc32_eth_calc_pmull(data,
+ data_len,
+ 0xffff,
+ &crc16_ccitt_pmull);
+}
+
+static inline uint32_t
+rte_crc32_eth_neon_handler(const uint8_t *data,
+ uint32_t data_len)
+{
+ return ~crc32_eth_calc_pmull(data,
+ data_len,
+ 0xffffffffUL,
+ &crc32_eth_pmull);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _NET_CRC_NEON_H_ */
diff --git a/lib/librte_net/net_crc_sse.h b/lib/librte_net/net_crc_sse.h
index 8bce522a..ac93637b 100644
--- a/lib/librte_net/net_crc_sse.h
+++ b/lib/librte_net/net_crc_sse.h
@@ -73,7 +73,7 @@ struct crc_pclmulqdq_ctx crc16_ccitt_pclmulqdq __rte_aligned(16);
* @return
* New 16 byte folded data
*/
-static inline __attribute__((always_inline)) __m128i
+static __rte_always_inline __m128i
crcr32_folding_round(__m128i data_block,
__m128i precomp,
__m128i fold)
@@ -96,7 +96,7 @@ crcr32_folding_round(__m128i data_block,
* 64 bits reduced data
*/
-static inline __attribute__((always_inline)) __m128i
+static __rte_always_inline __m128i
crcr32_reduce_128_to_64(__m128i data128, __m128i precomp)
{
__m128i tmp0, tmp1, tmp2;
@@ -125,7 +125,7 @@ crcr32_reduce_128_to_64(__m128i data128, __m128i precomp)
* reduced 32 bits data
*/
-static inline __attribute__((always_inline)) uint32_t
+static __rte_always_inline uint32_t
crcr32_reduce_64_to_32(__m128i data64, __m128i precomp)
{
static const uint32_t mask1[4] __rte_aligned(16) = {
@@ -171,7 +171,7 @@ static const uint8_t crc_xmm_shift_tab[48] __rte_aligned(16) = {
* reg << (num * 8)
*/
-static inline __attribute__((always_inline)) __m128i
+static __rte_always_inline __m128i
xmm_shift_left(__m128i reg, const unsigned int num)
{
const __m128i *p = (const __m128i *)(crc_xmm_shift_tab + 16 - num);
@@ -179,7 +179,7 @@ xmm_shift_left(__m128i reg, const unsigned int num)
return _mm_shuffle_epi8(reg, _mm_loadu_si128(p));
}
-static inline __attribute__((always_inline)) uint32_t
+static __rte_always_inline uint32_t
crc32_eth_calc_pclmulqdq(
const uint8_t *data,
uint32_t data_len,
diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c
index 9d1ee63f..661fe322 100644
--- a/lib/librte_net/rte_net_crc.c
+++ b/lib/librte_net/rte_net_crc.c
@@ -39,14 +39,16 @@
#include <rte_common.h>
#include <rte_net_crc.h>
-#if defined(RTE_ARCH_X86_64) \
- && defined(RTE_MACHINE_CPUFLAG_SSE4_2) \
- && defined(RTE_MACHINE_CPUFLAG_PCLMULQDQ)
+#if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_PCLMULQDQ)
#define X86_64_SSE42_PCLMULQDQ 1
+#elif defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_PMULL)
+#define ARM64_NEON_PMULL 1
#endif
#ifdef X86_64_SSE42_PCLMULQDQ
#include <net_crc_sse.h>
+#elif defined ARM64_NEON_PMULL
+#include <net_crc_neon.h>
#endif
/* crc tables */
@@ -74,6 +76,11 @@ static rte_net_crc_handler handlers_sse42[] = {
[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_sse42_handler,
[RTE_NET_CRC32_ETH] = rte_crc32_eth_sse42_handler,
};
+#elif defined ARM64_NEON_PMULL
+static rte_net_crc_handler handlers_neon[] = {
+ [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_neon_handler,
+ [RTE_NET_CRC32_ETH] = rte_crc32_eth_neon_handler,
+};
#endif
/**
@@ -116,7 +123,7 @@ crc32_eth_init_lut(uint32_t poly,
}
}
-static inline __attribute__((always_inline)) uint32_t
+static __rte_always_inline uint32_t
crc32_eth_calc_lut(const uint8_t *data,
uint32_t data_len,
uint32_t crc,
@@ -162,14 +169,21 @@ void
rte_net_crc_set_alg(enum rte_net_crc_alg alg)
{
switch (alg) {
- case RTE_NET_CRC_SSE42:
#ifdef X86_64_SSE42_PCLMULQDQ
+ case RTE_NET_CRC_SSE42:
handlers = handlers_sse42;
-#else
- alg = RTE_NET_CRC_SCALAR;
-#endif
break;
+#elif defined ARM64_NEON_PMULL
+ /* fall-through */
+ case RTE_NET_CRC_NEON:
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) {
+ handlers = handlers_neon;
+ break;
+ }
+#endif
+ /* fall-through */
case RTE_NET_CRC_SCALAR:
+ /* fall-through */
default:
handlers = handlers_scalar;
break;
@@ -199,8 +213,13 @@ rte_net_crc_init(void)
rte_net_crc_scalar_init();
#ifdef X86_64_SSE42_PCLMULQDQ
- alg = RTE_NET_CRC_SSE42;
- rte_net_crc_sse42_init();
+ alg = RTE_NET_CRC_SSE42;
+ rte_net_crc_sse42_init();
+#elif defined ARM64_NEON_PMULL
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) {
+ alg = RTE_NET_CRC_NEON;
+ rte_net_crc_neon_init();
+ }
#endif
rte_net_crc_set_alg(alg);
diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h
index d22286c6..d01cf4b4 100644
--- a/lib/librte_net/rte_net_crc.h
+++ b/lib/librte_net/rte_net_crc.h
@@ -57,6 +57,7 @@ enum rte_net_crc_type {
enum rte_net_crc_alg {
RTE_NET_CRC_SCALAR = 0,
RTE_NET_CRC_SSE42,
+ RTE_NET_CRC_NEON,
};
/**
@@ -68,6 +69,7 @@ enum rte_net_crc_alg {
* This parameter is used to select the CRC implementation version.
* - RTE_NET_CRC_SCALAR
* - RTE_NET_CRC_SSE42 (Use 64-bit SSE4.2 intrinsic)
+ * - RTE_NET_CRC_NEON (Use ARM Neon intrinsic)
*/
void
rte_net_crc_set_alg(enum rte_net_crc_alg alg);
diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
index b599d65d..729e79a3 100644
--- a/lib/librte_pdump/rte_pdump.c
+++ b/lib/librte_pdump/rte_pdump.c
@@ -46,7 +46,6 @@
#include <rte_lcore.h>
#include <rte_log.h>
#include <rte_errno.h>
-#include <rte_pci.h>
#include "rte_pdump.h"
diff --git a/lib/librte_port/rte_port_ring.c b/lib/librte_port/rte_port_ring.c
index 64bd965f..a4e709c9 100644
--- a/lib/librte_port/rte_port_ring.c
+++ b/lib/librte_port/rte_port_ring.c
@@ -293,7 +293,7 @@ rte_port_ring_multi_writer_tx(void *port, struct rte_mbuf *pkt)
return 0;
}
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
rte_port_ring_writer_tx_bulk_internal(void *port,
struct rte_mbuf **pkts,
uint64_t pkts_mask,
@@ -609,7 +609,7 @@ rte_port_ring_multi_writer_nodrop_tx(void *port, struct rte_mbuf *pkt)
return 0;
}
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
rte_port_ring_writer_nodrop_tx_bulk_internal(void *port,
struct rte_mbuf **pkts,
uint64_t pkts_mask,
diff --git a/lib/librte_reorder/rte_reorder.h b/lib/librte_reorder/rte_reorder.h
index 737e0554..4cd8de76 100644
--- a/lib/librte_reorder/rte_reorder.h
+++ b/lib/librte_reorder/rte_reorder.h
@@ -147,7 +147,7 @@ rte_reorder_free(struct rte_reorder_buffer *b);
* -1 on error
* On error case, rte_errno will be set appropriately:
* - ENOSPC - Cannot move existing mbufs from reorder buffer to accommodate
- * ealry mbuf, but it can be accomodated by performing drain and then insert.
+ * ealry mbuf, but it can be accommodated by performing drain and then insert.
* - ERANGE - Too early or late mbuf which is vastly out of range of expected
* window should be ingnored without any handling.
*/
diff --git a/lib/librte_ring/rte_ring.c b/lib/librte_ring/rte_ring.c
index 5f98c33f..036467f4 100644
--- a/lib/librte_ring/rte_ring.c
+++ b/lib/librte_ring/rte_ring.c
@@ -140,8 +140,22 @@ rte_ring_init(struct rte_ring *r, const char *name, unsigned count,
r->flags = flags;
r->prod.single = (flags & RING_F_SP_ENQ) ? __IS_SP : __IS_MP;
r->cons.single = (flags & RING_F_SC_DEQ) ? __IS_SC : __IS_MC;
- r->size = count;
- r->mask = count - 1;
+
+ if (flags & RING_F_EXACT_SZ) {
+ r->size = rte_align32pow2(count + 1);
+ r->mask = r->size - 1;
+ r->capacity = count;
+ } else {
+ if ((!POWEROF2(count)) || (count > RTE_RING_SZ_MASK)) {
+ RTE_LOG(ERR, RING,
+ "Requested size is invalid, must be power of 2, and not exceed the size limit %u\n",
+ RTE_RING_SZ_MASK);
+ return -EINVAL;
+ }
+ r->size = count;
+ r->mask = count - 1;
+ r->capacity = r->mask;
+ }
r->prod.head = r->cons.head = 0;
r->prod.tail = r->cons.tail = 0;
@@ -160,10 +174,15 @@ rte_ring_create(const char *name, unsigned count, int socket_id,
ssize_t ring_size;
int mz_flags = 0;
struct rte_ring_list* ring_list = NULL;
+ const unsigned int requested_count = count;
int ret;
ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list);
+ /* for an exact size ring, round up from count to a power of two */
+ if (flags & RING_F_EXACT_SZ)
+ count = rte_align32pow2(count + 1);
+
ring_size = rte_ring_get_memsize(count);
if (ring_size < 0) {
rte_errno = ring_size;
@@ -189,12 +208,13 @@ rte_ring_create(const char *name, unsigned count, int socket_id,
/* reserve a memory zone for this ring. If we can't get rte_config or
* we are secondary process, the memzone_reserve function will set
* rte_errno for us appropriately - hence no check in this this function */
- mz = rte_memzone_reserve(mz_name, ring_size, socket_id, mz_flags);
+ mz = rte_memzone_reserve_aligned(mz_name, ring_size, socket_id,
+ mz_flags, __alignof__(*r));
if (mz != NULL) {
r = mz->addr;
/* no need to check return value here, we already checked the
* arguments above */
- rte_ring_init(r, name, count, flags);
+ rte_ring_init(r, name, requested_count, flags);
te->data = (void *) r;
r->memzone = mz;
@@ -262,6 +282,7 @@ rte_ring_dump(FILE *f, const struct rte_ring *r)
fprintf(f, "ring <%s>@%p\n", r->name, r);
fprintf(f, " flags=%x\n", r->flags);
fprintf(f, " size=%"PRIu32"\n", r->size);
+ fprintf(f, " capacity=%"PRIu32"\n", r->capacity);
fprintf(f, " ct=%"PRIu32"\n", r->cons.tail);
fprintf(f, " ch=%"PRIu32"\n", r->cons.head);
fprintf(f, " pt=%"PRIu32"\n", r->prod.tail);
diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h
index 97f025a1..8f5a4937 100644
--- a/lib/librte_ring/rte_ring.h
+++ b/lib/librte_ring/rte_ring.h
@@ -101,6 +101,7 @@ extern "C" {
#include <rte_atomic.h>
#include <rte_branch_prediction.h>
#include <rte_memzone.h>
+#include <rte_pause.h>
#define RTE_TAILQ_RING_NAME "RTE_RING"
@@ -153,6 +154,7 @@ struct rte_ring {
/**< Memzone, if any, containing the rte_ring */
uint32_t size; /**< Size of ring. */
uint32_t mask; /**< Mask (size-1) of ring. */
+ uint32_t capacity; /**< Usable size of ring */
/** Ring producer status. */
struct rte_ring_headtail prod __rte_aligned(PROD_ALIGN);
@@ -163,6 +165,15 @@ struct rte_ring {
#define RING_F_SP_ENQ 0x0001 /**< The default enqueue is "single-producer". */
#define RING_F_SC_DEQ 0x0002 /**< The default dequeue is "single-consumer". */
+/**
+ * Ring is to hold exactly requested number of entries.
+ * Without this flag set, the ring size requested must be a power of 2, and the
+ * usable space will be that size - 1. With the flag, the requested size will
+ * be rounded up to the next power of two, but the usable space will be exactly
+ * that requested. Worst case, if a power-of-2 size is requested, half the
+ * ring space will be wasted.
+ */
+#define RING_F_EXACT_SZ 0x0004
#define RTE_RING_SZ_MASK (unsigned)(0x0fffffff) /**< Ring size mask */
/* @internal defines for passing to the enqueue dequeue worker functions */
@@ -345,7 +356,7 @@ void rte_ring_dump(FILE *f, const struct rte_ring *r);
} \
} while (0)
-static inline __attribute__((always_inline)) void
+static __rte_always_inline void
update_tail(struct rte_ring_headtail *ht, uint32_t old_val, uint32_t new_val,
uint32_t single)
{
@@ -383,13 +394,13 @@ update_tail(struct rte_ring_headtail *ht, uint32_t old_val, uint32_t new_val,
* Actual number of objects enqueued.
* If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only.
*/
-static inline __attribute__((always_inline)) unsigned int
+static __rte_always_inline unsigned int
__rte_ring_move_prod_head(struct rte_ring *r, int is_sp,
unsigned int n, enum rte_ring_queue_behavior behavior,
uint32_t *old_head, uint32_t *new_head,
uint32_t *free_entries)
{
- const uint32_t mask = r->mask;
+ const uint32_t capacity = r->capacity;
unsigned int max = n;
int success;
@@ -399,11 +410,13 @@ __rte_ring_move_prod_head(struct rte_ring *r, int is_sp,
*old_head = r->prod.head;
const uint32_t cons_tail = r->cons.tail;
- /* The subtraction is done between two unsigned 32bits value
+ /*
+ * The subtraction is done between two unsigned 32bits value
* (the result is always modulo 32 bits even if we have
* *old_head > cons_tail). So 'free_entries' is always between 0
- * and size(ring)-1. */
- *free_entries = (mask + cons_tail - *old_head);
+ * and capacity (which is < size).
+ */
+ *free_entries = (capacity + cons_tail - *old_head);
/* check that we have enough room in ring */
if (unlikely(n > *free_entries))
@@ -443,7 +456,7 @@ __rte_ring_move_prod_head(struct rte_ring *r, int is_sp,
* Actual number of objects enqueued.
* If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only.
*/
-static inline __attribute__((always_inline)) unsigned int
+static __rte_always_inline unsigned int
__rte_ring_do_enqueue(struct rte_ring *r, void * const *obj_table,
unsigned int n, enum rte_ring_queue_behavior behavior,
int is_sp, unsigned int *free_space)
@@ -489,7 +502,7 @@ end:
* - Actual number of objects dequeued.
* If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only.
*/
-static inline __attribute__((always_inline)) unsigned int
+static __rte_always_inline unsigned int
__rte_ring_move_cons_head(struct rte_ring *r, int is_sc,
unsigned int n, enum rte_ring_queue_behavior behavior,
uint32_t *old_head, uint32_t *new_head,
@@ -548,7 +561,7 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc,
* - Actual number of objects dequeued.
* If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only.
*/
-static inline __attribute__((always_inline)) unsigned int
+static __rte_always_inline unsigned int
__rte_ring_do_dequeue(struct rte_ring *r, void **obj_table,
unsigned int n, enum rte_ring_queue_behavior behavior,
int is_sc, unsigned int *available)
@@ -590,7 +603,7 @@ end:
* @return
* The number of objects enqueued, either 0 or n
*/
-static inline unsigned int __attribute__((always_inline))
+static __rte_always_inline unsigned int
rte_ring_mp_enqueue_bulk(struct rte_ring *r, void * const *obj_table,
unsigned int n, unsigned int *free_space)
{
@@ -613,7 +626,7 @@ rte_ring_mp_enqueue_bulk(struct rte_ring *r, void * const *obj_table,
* @return
* The number of objects enqueued, either 0 or n
*/
-static inline unsigned int __attribute__((always_inline))
+static __rte_always_inline unsigned int
rte_ring_sp_enqueue_bulk(struct rte_ring *r, void * const *obj_table,
unsigned int n, unsigned int *free_space)
{
@@ -640,7 +653,7 @@ rte_ring_sp_enqueue_bulk(struct rte_ring *r, void * const *obj_table,
* @return
* The number of objects enqueued, either 0 or n
*/
-static inline unsigned int __attribute__((always_inline))
+static __rte_always_inline unsigned int
rte_ring_enqueue_bulk(struct rte_ring *r, void * const *obj_table,
unsigned int n, unsigned int *free_space)
{
@@ -662,7 +675,7 @@ rte_ring_enqueue_bulk(struct rte_ring *r, void * const *obj_table,
* - 0: Success; objects enqueued.
* - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued.
*/
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
rte_ring_mp_enqueue(struct rte_ring *r, void *obj)
{
return rte_ring_mp_enqueue_bulk(r, &obj, 1, NULL) ? 0 : -ENOBUFS;
@@ -679,7 +692,7 @@ rte_ring_mp_enqueue(struct rte_ring *r, void *obj)
* - 0: Success; objects enqueued.
* - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued.
*/
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
rte_ring_sp_enqueue(struct rte_ring *r, void *obj)
{
return rte_ring_sp_enqueue_bulk(r, &obj, 1, NULL) ? 0 : -ENOBUFS;
@@ -700,7 +713,7 @@ rte_ring_sp_enqueue(struct rte_ring *r, void *obj)
* - 0: Success; objects enqueued.
* - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued.
*/
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
rte_ring_enqueue(struct rte_ring *r, void *obj)
{
return rte_ring_enqueue_bulk(r, &obj, 1, NULL) ? 0 : -ENOBUFS;
@@ -724,7 +737,7 @@ rte_ring_enqueue(struct rte_ring *r, void *obj)
* @return
* The number of objects dequeued, either 0 or n
*/
-static inline unsigned int __attribute__((always_inline))
+static __rte_always_inline unsigned int
rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table,
unsigned int n, unsigned int *available)
{
@@ -748,7 +761,7 @@ rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table,
* @return
* The number of objects dequeued, either 0 or n
*/
-static inline unsigned int __attribute__((always_inline))
+static __rte_always_inline unsigned int
rte_ring_sc_dequeue_bulk(struct rte_ring *r, void **obj_table,
unsigned int n, unsigned int *available)
{
@@ -775,7 +788,7 @@ rte_ring_sc_dequeue_bulk(struct rte_ring *r, void **obj_table,
* @return
* The number of objects dequeued, either 0 or n
*/
-static inline unsigned int __attribute__((always_inline))
+static __rte_always_inline unsigned int
rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned int n,
unsigned int *available)
{
@@ -798,10 +811,10 @@ rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned int n,
* - -ENOENT: Not enough entries in the ring to dequeue; no object is
* dequeued.
*/
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
rte_ring_mc_dequeue(struct rte_ring *r, void **obj_p)
{
- return rte_ring_mc_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOBUFS;
+ return rte_ring_mc_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOENT;
}
/**
@@ -816,10 +829,10 @@ rte_ring_mc_dequeue(struct rte_ring *r, void **obj_p)
* - -ENOENT: Not enough entries in the ring to dequeue, no object is
* dequeued.
*/
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
rte_ring_sc_dequeue(struct rte_ring *r, void **obj_p)
{
- return rte_ring_sc_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOBUFS;
+ return rte_ring_sc_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOENT;
}
/**
@@ -838,76 +851,71 @@ rte_ring_sc_dequeue(struct rte_ring *r, void **obj_p)
* - -ENOENT: Not enough entries in the ring to dequeue, no object is
* dequeued.
*/
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
rte_ring_dequeue(struct rte_ring *r, void **obj_p)
{
return rte_ring_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOENT;
}
/**
- * Test if a ring is full.
+ * Return the number of entries in a ring.
*
* @param r
* A pointer to the ring structure.
* @return
- * - 1: The ring is full.
- * - 0: The ring is not full.
+ * The number of entries in the ring.
*/
-static inline int
-rte_ring_full(const struct rte_ring *r)
+static inline unsigned
+rte_ring_count(const struct rte_ring *r)
{
uint32_t prod_tail = r->prod.tail;
uint32_t cons_tail = r->cons.tail;
- return ((cons_tail - prod_tail - 1) & r->mask) == 0;
+ uint32_t count = (prod_tail - cons_tail) & r->mask;
+ return (count > r->capacity) ? r->capacity : count;
}
/**
- * Test if a ring is empty.
+ * Return the number of free entries in a ring.
*
* @param r
* A pointer to the ring structure.
* @return
- * - 1: The ring is empty.
- * - 0: The ring is not empty.
+ * The number of free entries in the ring.
*/
-static inline int
-rte_ring_empty(const struct rte_ring *r)
+static inline unsigned
+rte_ring_free_count(const struct rte_ring *r)
{
- uint32_t prod_tail = r->prod.tail;
- uint32_t cons_tail = r->cons.tail;
- return !!(cons_tail == prod_tail);
+ return r->capacity - rte_ring_count(r);
}
/**
- * Return the number of entries in a ring.
+ * Test if a ring is full.
*
* @param r
* A pointer to the ring structure.
* @return
- * The number of entries in the ring.
+ * - 1: The ring is full.
+ * - 0: The ring is not full.
*/
-static inline unsigned
-rte_ring_count(const struct rte_ring *r)
+static inline int
+rte_ring_full(const struct rte_ring *r)
{
- uint32_t prod_tail = r->prod.tail;
- uint32_t cons_tail = r->cons.tail;
- return (prod_tail - cons_tail) & r->mask;
+ return rte_ring_free_count(r) == 0;
}
/**
- * Return the number of free entries in a ring.
+ * Test if a ring is empty.
*
* @param r
* A pointer to the ring structure.
* @return
- * The number of free entries in the ring.
+ * - 1: The ring is empty.
+ * - 0: The ring is not empty.
*/
-static inline unsigned
-rte_ring_free_count(const struct rte_ring *r)
+static inline int
+rte_ring_empty(const struct rte_ring *r)
{
- uint32_t prod_tail = r->prod.tail;
- uint32_t cons_tail = r->cons.tail;
- return (cons_tail - prod_tail - 1) & r->mask;
+ return rte_ring_count(r) == 0;
}
/**
@@ -916,7 +924,9 @@ rte_ring_free_count(const struct rte_ring *r)
* @param r
* A pointer to the ring structure.
* @return
- * The number of elements which can be stored in the ring.
+ * The size of the data store used by the ring.
+ * NOTE: this is not the same as the usable space in the ring. To query that
+ * use ``rte_ring_get_capacity()``.
*/
static inline unsigned int
rte_ring_get_size(const struct rte_ring *r)
@@ -925,6 +935,20 @@ rte_ring_get_size(const struct rte_ring *r)
}
/**
+ * Return the number of elements which can be stored in the ring.
+ *
+ * @param r
+ * A pointer to the ring structure.
+ * @return
+ * The usable size of the ring.
+ */
+static inline unsigned int
+rte_ring_get_capacity(const struct rte_ring *r)
+{
+ return r->capacity;
+}
+
+/**
* Dump the status of all rings on the console
*
* @param f
@@ -962,7 +986,7 @@ struct rte_ring *rte_ring_lookup(const char *name);
* @return
* - n: Actual number of objects enqueued.
*/
-static inline unsigned __attribute__((always_inline))
+static __rte_always_inline unsigned
rte_ring_mp_enqueue_burst(struct rte_ring *r, void * const *obj_table,
unsigned int n, unsigned int *free_space)
{
@@ -985,7 +1009,7 @@ rte_ring_mp_enqueue_burst(struct rte_ring *r, void * const *obj_table,
* @return
* - n: Actual number of objects enqueued.
*/
-static inline unsigned __attribute__((always_inline))
+static __rte_always_inline unsigned
rte_ring_sp_enqueue_burst(struct rte_ring *r, void * const *obj_table,
unsigned int n, unsigned int *free_space)
{
@@ -1012,7 +1036,7 @@ rte_ring_sp_enqueue_burst(struct rte_ring *r, void * const *obj_table,
* @return
* - n: Actual number of objects enqueued.
*/
-static inline unsigned __attribute__((always_inline))
+static __rte_always_inline unsigned
rte_ring_enqueue_burst(struct rte_ring *r, void * const *obj_table,
unsigned int n, unsigned int *free_space)
{
@@ -1040,7 +1064,7 @@ rte_ring_enqueue_burst(struct rte_ring *r, void * const *obj_table,
* @return
* - n: Actual number of objects dequeued, 0 if ring is empty
*/
-static inline unsigned __attribute__((always_inline))
+static __rte_always_inline unsigned
rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table,
unsigned int n, unsigned int *available)
{
@@ -1065,7 +1089,7 @@ rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table,
* @return
* - n: Actual number of objects dequeued, 0 if ring is empty
*/
-static inline unsigned __attribute__((always_inline))
+static __rte_always_inline unsigned
rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table,
unsigned int n, unsigned int *available)
{
@@ -1092,7 +1116,7 @@ rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table,
* @return
* - Number of objects dequeued
*/
-static inline unsigned __attribute__((always_inline))
+static __rte_always_inline unsigned
rte_ring_dequeue_burst(struct rte_ring *r, void **obj_table,
unsigned int n, unsigned int *available)
{
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index 614705d8..b7cba110 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -56,8 +56,10 @@
#ifdef RTE_SCHED_VECTOR
#include <rte_vect.h>
-#if defined(__SSE4__)
+#ifdef RTE_ARCH_X86
#define SCHED_VECTOR_SSE4
+#elif defined(RTE_MACHINE_CPUFLAG_NEON)
+#define SCHED_VECTOR_NEON
#endif
#endif
@@ -1732,6 +1734,26 @@ grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe)
return 1;
}
+#elif defined(SCHED_VECTOR_NEON)
+
+static inline int
+grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe)
+{
+ uint32x4_t index, pipes;
+ uint32_t *pos = (uint32_t *)port->grinder_base_bmp_pos;
+
+ index = vmovq_n_u32(base_pipe);
+ pipes = vld1q_u32(pos);
+ if (!vminvq_u32(veorq_u32(pipes, index)))
+ return 1;
+
+ pipes = vld1q_u32(pos + 4);
+ if (!vminvq_u32(veorq_u32(pipes, index)))
+ return 1;
+
+ return 0;
+}
+
#else
static inline int
diff --git a/lib/librte_table/Makefile b/lib/librte_table/Makefile
index 0d06d36a..8ddc8804 100644
--- a/lib/librte_table/Makefile
+++ b/lib/librte_table/Makefile
@@ -69,6 +69,12 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_acl.h
endif
SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_hash.h
SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru.h
+ifeq ($(CONFIG_RTE_ARCH_X86),y)
+SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru_x86.h
+endif
+ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
+SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru_arm64.h
+endif
SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_array.h
SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_stub.h
diff --git a/lib/librte_table/rte_lru.h b/lib/librte_table/rte_lru.h
index e87e062d..93258ef4 100644
--- a/lib/librte_table/rte_lru.h
+++ b/lib/librte_table/rte_lru.h
@@ -38,31 +38,15 @@
extern "C" {
#endif
-#include <stdint.h>
-
-#ifdef __INTEL_COMPILER
-#define GCC_VERSION (0)
+#ifdef RTE_ARCH_X86_64
+#include "rte_lru_x86.h"
+#elif defined(RTE_ARCH_ARM64)
+#include "rte_lru_arm64.h"
#else
-#define GCC_VERSION (__GNUC__ * 10000+__GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
-#endif
-
-#ifndef RTE_TABLE_HASH_LRU_STRATEGY
-#ifdef __SSE4_2__
-#define RTE_TABLE_HASH_LRU_STRATEGY 2
-#else /* if no SSE, use simple scalar version */
-#define RTE_TABLE_HASH_LRU_STRATEGY 1
-#endif
-#endif
-
-#ifndef RTE_ARCH_X86_64
#undef RTE_TABLE_HASH_LRU_STRATEGY
#define RTE_TABLE_HASH_LRU_STRATEGY 1
#endif
-#if (RTE_TABLE_HASH_LRU_STRATEGY < 0) || (RTE_TABLE_HASH_LRU_STRATEGY > 3)
-#error Invalid value for RTE_TABLE_HASH_LRU_STRATEGY
-#endif
-
#if RTE_TABLE_HASH_LRU_STRATEGY == 0
#define lru_init(bucket) \
@@ -118,87 +102,11 @@ do { \
bucket->lru_list = x; \
} while (0)
-#elif RTE_TABLE_HASH_LRU_STRATEGY == 2
-
-#if GCC_VERSION > 40306
-#include <x86intrin.h>
-#else
-#include <emmintrin.h>
-#include <smmintrin.h>
-#include <xmmintrin.h>
-#endif
-
-#define lru_init(bucket) \
-do \
- bucket->lru_list = 0x0000000100020003LLU; \
-while (0)
-
-#define lru_pos(bucket) (bucket->lru_list & 0xFFFFLLU)
-
-#define lru_update(bucket, mru_val) \
-do { \
- /* set up the masks for all possible shuffles, depends on pos */\
- static uint64_t masks[10] = { \
- /* Shuffle order; Make Zero (see _mm_shuffle_epi8 manual) */\
- 0x0100070605040302, 0x8080808080808080, \
- 0x0302070605040100, 0x8080808080808080, \
- 0x0504070603020100, 0x8080808080808080, \
- 0x0706050403020100, 0x8080808080808080, \
- 0x0706050403020100, 0x8080808080808080}; \
- /* load up one register with repeats of mru-val */ \
- uint64_t mru2 = mru_val; \
- uint64_t mru3 = mru2 | (mru2 << 16); \
- uint64_t lru = bucket->lru_list; \
- /* XOR to cause the word we're looking for to go to zero */ \
- uint64_t mru = lru ^ ((mru3 << 32) | mru3); \
- __m128i c = _mm_cvtsi64_si128(mru); \
- __m128i b = _mm_cvtsi64_si128(lru); \
- /* Find the minimum value (first zero word, if it's in there) */\
- __m128i d = _mm_minpos_epu16(c); \
- /* Second word is the index to found word (first word is the value) */\
- unsigned pos = _mm_extract_epi16(d, 1); \
- /* move the recently used location to top of list */ \
- __m128i k = _mm_shuffle_epi8(b, *((__m128i *) &masks[2 * pos]));\
- /* Finally, update the original list with the reordered data */ \
- bucket->lru_list = _mm_extract_epi64(k, 0); \
- /* Phwew! */ \
-} while (0)
-
-#elif RTE_TABLE_HASH_LRU_STRATEGY == 3
+#elif (RTE_TABLE_HASH_LRU_STRATEGY == 2) || (RTE_TABLE_HASH_LRU_STRATEGY == 3)
-#if GCC_VERSION > 40306
-#include <x86intrin.h>
-#else
-#include <emmintrin.h>
-#include <smmintrin.h>
-#include <xmmintrin.h>
-#endif
-
-#define lru_init(bucket) \
-do \
- bucket->lru_list = ~0LLU; \
-while (0)
-
-
-static inline int
-f_lru_pos(uint64_t lru_list)
-{
- __m128i lst = _mm_set_epi64x((uint64_t)-1, lru_list);
- __m128i min = _mm_minpos_epu16(lst);
- return _mm_extract_epi16(min, 1);
-}
-#define lru_pos(bucket) f_lru_pos(bucket->lru_list)
-
-#define lru_update(bucket, mru_val) \
-do { \
- const uint64_t orvals[] = {0xFFFFLLU, 0xFFFFLLU << 16, \
- 0xFFFFLLU << 32, 0xFFFFLLU << 48, 0LLU}; \
- const uint64_t decs[] = {0x1000100010001LLU, 0}; \
- __m128i lru = _mm_cvtsi64_si128(bucket->lru_list); \
- __m128i vdec = _mm_cvtsi64_si128(decs[mru_val>>2]); \
- lru = _mm_subs_epu16(lru, vdec); \
- bucket->lru_list = _mm_extract_epi64(lru, 0) | orvals[mru_val]; \
-} while (0)
+/**
+ * These strategies are implemented in architecture specific header files.
+ */
#else
diff --git a/lib/librte_table/rte_lru_arm64.h b/lib/librte_table/rte_lru_arm64.h
new file mode 100644
index 00000000..61735238
--- /dev/null
+++ b/lib/librte_table/rte_lru_arm64.h
@@ -0,0 +1,88 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium, Inc. 2017.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium, Inc nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __RTE_LRU_ARM64_H__
+#define __RTE_LRU_ARM64_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <rte_vect.h>
+
+#ifndef RTE_TABLE_HASH_LRU_STRATEGY
+#ifdef RTE_MACHINE_CPUFLAG_NEON
+#define RTE_TABLE_HASH_LRU_STRATEGY 3
+#else /* if no NEON, use simple scalar version */
+#define RTE_TABLE_HASH_LRU_STRATEGY 1
+#endif
+#endif
+
+#if RTE_TABLE_HASH_LRU_STRATEGY == 3
+
+#define lru_init(bucket) \
+ { bucket->lru_list = ~0LLU; }
+
+static inline int
+f_lru_pos(uint64_t lru_list)
+{
+ /* Compare the vector to zero vector */
+ uint16x4_t lru_vec = vld1_u16((uint16_t *)&lru_list);
+ uint16x4_t min_vec = vmov_n_u16(vminv_u16(lru_vec));
+ uint64_t mask = vget_lane_u64(vreinterpret_u64_u16(
+ vceq_u16(min_vec, lru_vec)), 0);
+ return __builtin_clzl(mask) >> 4;
+}
+#define lru_pos(bucket) f_lru_pos(bucket->lru_list)
+
+#define lru_update(bucket, mru_val) \
+do { \
+ const uint64_t orvals[] = {0xFFFFLLU, 0xFFFFLLU << 16, \
+ 0xFFFFLLU << 32, 0xFFFFLLU << 48, 0LLU}; \
+ const uint64_t decs[] = {0x1000100010001LLU, 0}; \
+ uint64x1_t lru = vdup_n_u64(bucket->lru_list); \
+ uint64x1_t vdec = vdup_n_u64(decs[mru_val>>2]); \
+ bucket->lru_list = vget_lane_u64(vreinterpret_u64_u16( \
+ vsub_u16(vreinterpret_u16_u64(lru), \
+ vreinterpret_u16_u64(vdec))), \
+ 0); \
+ bucket->lru_list |= orvals[mru_val]; \
+} while (0)
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_table/rte_lru_x86.h b/lib/librte_table/rte_lru_x86.h
new file mode 100644
index 00000000..10f513cd
--- /dev/null
+++ b/lib/librte_table/rte_lru_x86.h
@@ -0,0 +1,130 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_LRU_X86_H__
+#define __INCLUDE_RTE_LRU_X86_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#ifndef RTE_TABLE_HASH_LRU_STRATEGY
+#define RTE_TABLE_HASH_LRU_STRATEGY 2
+#endif
+
+#if RTE_TABLE_HASH_LRU_STRATEGY == 2
+
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION > 40306)
+#include <x86intrin.h>
+#else
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include <xmmintrin.h>
+#endif
+
+#define lru_init(bucket) \
+ { bucket->lru_list = 0x0000000100020003LLU; }
+
+#define lru_pos(bucket) (bucket->lru_list & 0xFFFFLLU)
+
+#define lru_update(bucket, mru_val) \
+do { \
+ /* set up the masks for all possible shuffles, depends on pos */\
+ static uint64_t masks[10] = { \
+ /* Shuffle order; Make Zero (see _mm_shuffle_epi8 manual) */\
+ 0x0100070605040302, 0x8080808080808080, \
+ 0x0302070605040100, 0x8080808080808080, \
+ 0x0504070603020100, 0x8080808080808080, \
+ 0x0706050403020100, 0x8080808080808080, \
+ 0x0706050403020100, 0x8080808080808080}; \
+ /* load up one register with repeats of mru-val */ \
+ uint64_t mru2 = mru_val; \
+ uint64_t mru3 = mru2 | (mru2 << 16); \
+ uint64_t lru = bucket->lru_list; \
+ /* XOR to cause the word we're looking for to go to zero */ \
+ uint64_t mru = lru ^ ((mru3 << 32) | mru3); \
+ __m128i c = _mm_cvtsi64_si128(mru); \
+ __m128i b = _mm_cvtsi64_si128(lru); \
+ /* Find the minimum value (first zero word, if it's in there) */\
+ __m128i d = _mm_minpos_epu16(c); \
+ /* Second word is the index to found word (first word is the value) */\
+ unsigned int pos = _mm_extract_epi16(d, 1); \
+ /* move the recently used location to top of list */ \
+ __m128i k = _mm_shuffle_epi8(b, *((__m128i *) &masks[2 * pos]));\
+ /* Finally, update the original list with the reordered data */ \
+ bucket->lru_list = _mm_extract_epi64(k, 0); \
+ /* Phwew! */ \
+} while (0)
+
+#elif RTE_TABLE_HASH_LRU_STRATEGY == 3
+
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION > 40306)
+#include <x86intrin.h>
+#else
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include <xmmintrin.h>
+#endif
+
+#define lru_init(bucket) \
+ { bucket->lru_list = ~0LLU; }
+
+static inline int
+f_lru_pos(uint64_t lru_list)
+{
+ __m128i lst = _mm_set_epi64x((uint64_t)-1, lru_list);
+ __m128i min = _mm_minpos_epu16(lst);
+ return _mm_extract_epi16(min, 1);
+}
+#define lru_pos(bucket) f_lru_pos(bucket->lru_list)
+
+#define lru_update(bucket, mru_val) \
+do { \
+ const uint64_t orvals[] = {0xFFFFLLU, 0xFFFFLLU << 16, \
+ 0xFFFFLLU << 32, 0xFFFFLLU << 48, 0LLU}; \
+ const uint64_t decs[] = {0x1000100010001LLU, 0}; \
+ __m128i lru = _mm_cvtsi64_si128(bucket->lru_list); \
+ __m128i vdec = _mm_cvtsi64_si128(decs[mru_val>>2]); \
+ lru = _mm_subs_epu16(lru, vdec); \
+ bucket->lru_list = _mm_extract_epi64(lru, 0) | orvals[mru_val]; \
+} while (0)
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c
index 18782fab..5ee08408 100644
--- a/lib/librte_timer/rte_timer.c
+++ b/lib/librte_timer/rte_timer.c
@@ -46,11 +46,11 @@
#include <rte_memzone.h>
#include <rte_launch.h>
#include <rte_eal.h>
-#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_branch_prediction.h>
#include <rte_spinlock.h>
#include <rte_random.h>
+#include <rte_pause.h>
#include "rte_timer.h"
@@ -183,7 +183,7 @@ timer_set_running_state(struct rte_timer *tim)
return -1;
/* here, we know that timer is stopped or pending,
- * mark it atomically as beeing configured */
+ * mark it atomically as being configured */
status.state = RTE_TIMER_RUNNING;
status.owner = (int16_t)lcore_id;
success = rte_atomic32_cmpset(&tim->status.u32,
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index 605e47cb..8c974eb1 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -120,7 +120,7 @@ struct vhost_device_ops {
* @return
* the host virtual address on success, 0 on failure
*/
-static inline uint64_t __attribute__((always_inline))
+static __rte_always_inline uint64_t
rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa)
{
struct rte_vhost_mem_region *reg;
@@ -365,7 +365,7 @@ struct rte_mempool;
/**
* This function adds buffers to the virtio devices RX virtqueue. Buffers can
* be received from the physical port or from another virtual device. A packet
- * count is returned to indicate the number of packets that were succesfully
+ * count is returned to indicate the number of packets that were successfully
* added to the RX queue.
* @param vid
* vhost device ID
@@ -432,6 +432,18 @@ int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
struct rte_vhost_vring *vring);
+/**
+ * Get vhost RX queue avail count.
+ *
+ * @param vid
+ * vhost device ID
+ * @param qid
+ * virtio queue index in mq case
+ * @return
+ * num of desc available
+ */
+uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
index 07858732..1e704953 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -45,3 +45,10 @@ DPDK_17.05 {
rte_vhost_log_write;
} DPDK_16.07;
+
+DPDK_17.08 {
+ global:
+
+ rte_vhost_rx_queue_count;
+
+} DPDK_17.05;
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index c7f99b08..41aa3f9b 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -445,13 +445,22 @@ vhost_user_reconnect_init(void)
{
int ret;
- pthread_mutex_init(&reconn_list.mutex, NULL);
+ ret = pthread_mutex_init(&reconn_list.mutex, NULL);
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG, "failed to initialize mutex");
+ return ret;
+ }
TAILQ_INIT(&reconn_list.head);
ret = pthread_create(&reconn_tid, NULL,
vhost_user_client_reconnect, NULL);
- if (ret < 0)
+ if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
+ if (pthread_mutex_destroy(&reconn_list.mutex)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to destroy reconnect mutex");
+ }
+ }
return ret;
}
@@ -613,8 +622,19 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
goto out;
memset(vsocket, 0, sizeof(struct vhost_user_socket));
vsocket->path = strdup(path);
+ if (vsocket->path == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "error: failed to copy socket path string\n");
+ free(vsocket);
+ goto out;
+ }
TAILQ_INIT(&vsocket->conn_list);
- pthread_mutex_init(&vsocket->conn_mutex, NULL);
+ ret = pthread_mutex_init(&vsocket->conn_mutex, NULL);
+ if (ret) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "error: failed to init connection mutex\n");
+ goto out_free;
+ }
vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
/*
@@ -636,9 +656,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
if (vsocket->reconnect && reconn_tid == 0) {
if (vhost_user_reconnect_init() < 0) {
- free(vsocket->path);
- free(vsocket);
- goto out;
+ goto out_mutex;
}
}
} else {
@@ -646,13 +664,22 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
}
ret = create_unix_socket(vsocket);
if (ret < 0) {
- free(vsocket->path);
- free(vsocket);
- goto out;
+ goto out_mutex;
}
vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
+ pthread_mutex_unlock(&vhost_user.mutex);
+ return ret;
+
+out_mutex:
+ if (pthread_mutex_destroy(&vsocket->conn_mutex)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "error: failed to destroy connection mutex\n");
+ }
+out_free:
+ free(vsocket->path);
+ free(vsocket);
out:
pthread_mutex_unlock(&vhost_user.mutex);
@@ -724,6 +751,7 @@ rte_vhost_driver_unregister(const char *path)
}
pthread_mutex_unlock(&vsocket->conn_mutex);
+ pthread_mutex_destroy(&vsocket->conn_mutex);
free(vsocket->path);
free(vsocket);
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 1f565fbb..0b6aa1cc 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -40,6 +40,7 @@
#include <numaif.h>
#endif
+#include <rte_errno.h>
#include <rte_ethdev.h>
#include <rte_log.h>
#include <rte_string_fns.h>
@@ -272,7 +273,7 @@ rte_vhost_get_mtu(int vid, uint16_t *mtu)
if (!(dev->flags & VIRTIO_DEV_READY))
return -EAGAIN;
- if (!(dev->features & VIRTIO_NET_F_MTU))
+ if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
return -ENOTSUP;
*mtu = dev->mtu;
@@ -295,7 +296,8 @@ rte_vhost_get_numa_node(int vid)
MPOL_F_NODE | MPOL_F_ADDR);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG,
- "(%d) failed to query numa node: %d\n", vid, ret);
+ "(%d) failed to query numa node: %s\n",
+ vid, rte_strerror(errno));
return -1;
}
@@ -475,3 +477,29 @@ rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
vhost_log_used_vring(dev, vq, offset, len);
}
+
+uint32_t
+rte_vhost_rx_queue_count(int vid, uint16_t qid)
+{
+ struct virtio_net *dev;
+ struct vhost_virtqueue *vq;
+
+ dev = get_device(vid);
+ if (dev == NULL)
+ return 0;
+
+ if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
+ RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+ dev->vid, __func__, qid);
+ return 0;
+ }
+
+ vq = dev->virtqueue[qid];
+ if (vq == NULL)
+ return 0;
+
+ if (unlikely(vq->enabled == 0 || vq->avail == NULL))
+ return 0;
+
+ return *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
+}
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index ddd8a9c4..6fe72aeb 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -201,13 +201,22 @@ struct virtio_net {
#define VHOST_LOG_PAGE 4096
-static inline void __attribute__((always_inline))
+/*
+ * Atomically set a bit in memory.
+ */
+static __rte_always_inline void
+vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
+{
+ __sync_fetch_and_or_8(addr, (1U << nr));
+}
+
+static __rte_always_inline void
vhost_log_page(uint8_t *log_base, uint64_t page)
{
- log_base[page / 8] |= 1 << (page % 8);
+ vhost_set_bit(page % 8, &log_base[page / 8]);
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
{
uint64_t page;
@@ -229,7 +238,7 @@ vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
}
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint64_t offset, uint64_t len)
{
@@ -272,7 +281,7 @@ extern uint64_t VHOST_FEATURES;
extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
/* Convert guest physical address to host physical address */
-static inline phys_addr_t __attribute__((always_inline))
+static __rte_always_inline phys_addr_t
gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
{
uint32_t i;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 28808815..ad2e8d38 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -168,8 +168,12 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features)
uint64_t vhost_features = 0;
rte_vhost_driver_get_features(dev->ifname, &vhost_features);
- if (features & ~vhost_features)
+ if (features & ~vhost_features) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%d) received invalid negotiated features.\n",
+ dev->vid);
return -1;
+ }
if ((dev->flags & VIRTIO_DEV_RUNNING) && dev->features != features) {
if (dev->notify_ops->features_changed)
@@ -197,11 +201,11 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features)
*/
static int
vhost_user_set_vring_num(struct virtio_net *dev,
- struct vhost_vring_state *state)
+ VhostUserMsg *msg)
{
- struct vhost_virtqueue *vq = dev->virtqueue[state->index];
+ struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index];
- vq->size = state->num;
+ vq->size = msg->payload.state.num;
if (dev->dequeue_zero_copy) {
vq->nr_zmbuf = 0;
@@ -332,7 +336,7 @@ qva_to_vva(struct virtio_net *dev, uint64_t qva)
* This function then converts these to our address space.
*/
static int
-vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr)
+vhost_user_set_vring_addr(struct virtio_net *dev, VhostUserMsg *msg)
{
struct vhost_virtqueue *vq;
@@ -340,11 +344,11 @@ vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr)
return -1;
/* addr->index refers to the queue index. The txq 1, rxq is 0. */
- vq = dev->virtqueue[addr->index];
+ vq = dev->virtqueue[msg->payload.addr.index];
/* The addresses are converted from QEMU virtual to Vhost virtual. */
vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev,
- addr->desc_user_addr);
+ msg->payload.addr.desc_user_addr);
if (vq->desc == 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) failed to find desc ring address.\n",
@@ -352,11 +356,11 @@ vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr)
return -1;
}
- dev = numa_realloc(dev, addr->index);
- vq = dev->virtqueue[addr->index];
+ dev = numa_realloc(dev, msg->payload.addr.index);
+ vq = dev->virtqueue[msg->payload.addr.index];
vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev,
- addr->avail_user_addr);
+ msg->payload.addr.avail_user_addr);
if (vq->avail == 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) failed to find avail ring address.\n",
@@ -365,7 +369,7 @@ vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr)
}
vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev,
- addr->used_user_addr);
+ msg->payload.addr.used_user_addr);
if (vq->used == 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) failed to find used ring address.\n",
@@ -382,7 +386,7 @@ vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr)
vq->last_avail_idx = vq->used->idx;
}
- vq->log_guest_addr = addr->log_guest_addr;
+ vq->log_guest_addr = msg->payload.addr.log_guest_addr;
LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
dev->vid, vq->desc);
@@ -401,10 +405,12 @@ vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr)
*/
static int
vhost_user_set_vring_base(struct virtio_net *dev,
- struct vhost_vring_state *state)
+ VhostUserMsg *msg)
{
- dev->virtqueue[state->index]->last_used_idx = state->num;
- dev->virtqueue[state->index]->last_avail_idx = state->num;
+ dev->virtqueue[msg->payload.state.index]->last_used_idx =
+ msg->payload.state.num;
+ dev->virtqueue[msg->payload.state.index]->last_avail_idx =
+ msg->payload.state.num;
return 0;
}
@@ -517,6 +523,13 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
dev->max_guest_pages = 8;
dev->guest_pages = malloc(dev->max_guest_pages *
sizeof(struct guest_page));
+ if (dev->guest_pages == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%d) failed to allocate memory "
+ "for dev->guest_pages\n",
+ dev->vid);
+ return -1;
+ }
}
dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) +
@@ -695,9 +708,9 @@ free_zmbufs(struct vhost_virtqueue *vq)
*/
static int
vhost_user_get_vring_base(struct virtio_net *dev,
- struct vhost_vring_state *state)
+ VhostUserMsg *msg)
{
- struct vhost_virtqueue *vq = dev->virtqueue[state->index];
+ struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index];
/* We have to stop the queue (virtio) if it is running. */
if (dev->flags & VIRTIO_DEV_RUNNING) {
@@ -708,10 +721,11 @@ vhost_user_get_vring_base(struct virtio_net *dev,
dev->flags &= ~VIRTIO_DEV_READY;
/* Here we are safe to get the last used index */
- state->num = vq->last_used_idx;
+ msg->payload.state.num = vq->last_used_idx;
RTE_LOG(INFO, VHOST_CONFIG,
- "vring base idx:%d file:%d\n", state->index, state->num);
+ "vring base idx:%d file:%d\n", msg->payload.state.index,
+ msg->payload.state.num);
/*
* Based on current qemu vhost-user implementation, this message is
* sent and only sent in vhost_vring_stop.
@@ -736,18 +750,19 @@ vhost_user_get_vring_base(struct virtio_net *dev,
*/
static int
vhost_user_set_vring_enable(struct virtio_net *dev,
- struct vhost_vring_state *state)
+ VhostUserMsg *msg)
{
- int enable = (int)state->num;
+ int enable = (int)msg->payload.state.num;
RTE_LOG(INFO, VHOST_CONFIG,
"set queue enable: %d to qp idx: %d\n",
- enable, state->index);
+ enable, msg->payload.state.index);
if (dev->notify_ops->vring_state_changed)
- dev->notify_ops->vring_state_changed(dev->vid, state->index, enable);
+ dev->notify_ops->vring_state_changed(dev->vid,
+ msg->payload.state.index, enable);
- dev->virtqueue[state->index]->enabled = enable;
+ dev->virtqueue[msg->payload.state.index]->enabled = enable;
return 0;
}
@@ -1036,17 +1051,17 @@ vhost_user_msg_handler(int vid, int fd)
break;
case VHOST_USER_SET_VRING_NUM:
- vhost_user_set_vring_num(dev, &msg.payload.state);
+ vhost_user_set_vring_num(dev, &msg);
break;
case VHOST_USER_SET_VRING_ADDR:
- vhost_user_set_vring_addr(dev, &msg.payload.addr);
+ vhost_user_set_vring_addr(dev, &msg);
break;
case VHOST_USER_SET_VRING_BASE:
- vhost_user_set_vring_base(dev, &msg.payload.state);
+ vhost_user_set_vring_base(dev, &msg);
break;
case VHOST_USER_GET_VRING_BASE:
- vhost_user_get_vring_base(dev, &msg.payload.state);
+ vhost_user_get_vring_base(dev, &msg);
msg.size = sizeof(msg.payload.state);
send_vhost_message(fd, &msg);
break;
@@ -1071,7 +1086,7 @@ vhost_user_msg_handler(int vid, int fd)
break;
case VHOST_USER_SET_VRING_ENABLE:
- vhost_user_set_vring_enable(dev, &msg.payload.state);
+ vhost_user_set_vring_enable(dev, &msg);
break;
case VHOST_USER_SEND_RARP:
vhost_user_send_rarp(dev, &msg);
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 48219e05..a5f0eeba 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -55,7 +55,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint16_t to, uint16_t from, uint16_t size)
{
@@ -67,7 +67,7 @@ do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
size * sizeof(struct vring_used_elem));
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
@@ -95,7 +95,7 @@ flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq)
sizeof(vq->used->idx));
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
update_shadow_used_ring(struct vhost_virtqueue *vq,
uint16_t desc_idx, uint16_t len)
{
@@ -114,11 +114,16 @@ update_shadow_used_ring(struct vhost_virtqueue *vq,
static void
virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
{
- if (m_buf->ol_flags & PKT_TX_L4_MASK) {
+ uint64_t csum_l4 = m_buf->ol_flags & PKT_TX_L4_MASK;
+
+ if (m_buf->ol_flags & PKT_TX_TCP_SEG)
+ csum_l4 |= PKT_TX_TCP_CKSUM;
+
+ if (csum_l4) {
net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len;
- switch (m_buf->ol_flags & PKT_TX_L4_MASK) {
+ switch (csum_l4) {
case PKT_TX_TCP_CKSUM:
net_hdr->csum_offset = (offsetof(struct tcp_hdr,
cksum));
@@ -138,6 +143,15 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0);
}
+ /* IP cksum verification cannot be bypassed, then calculate here */
+ if (m_buf->ol_flags & PKT_TX_IP_CKSUM) {
+ struct ipv4_hdr *ipv4_hdr;
+
+ ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct ipv4_hdr *,
+ m_buf->l2_len);
+ ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
+ }
+
if (m_buf->ol_flags & PKT_TX_TCP_SEG) {
if (m_buf->ol_flags & PKT_TX_IPV4)
net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
@@ -153,7 +167,7 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
}
}
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
struct rte_mbuf *m, uint16_t desc_idx, uint32_t size)
{
@@ -233,11 +247,11 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
/**
* This function adds buffers to the virtio devices RX virtqueue. Buffers can
* be received from the physical port or from another virtio device. A packet
- * count is returned to indicate the number of packets that are succesfully
+ * count is returned to indicate the number of packets that are successfully
* added to the RX queue. This function works when the mbuf is scattered, but
* it doesn't support the mergeable feature.
*/
-static inline uint32_t __attribute__((always_inline))
+static __rte_always_inline uint32_t
virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
struct rte_mbuf **pkts, uint32_t count)
{
@@ -335,7 +349,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
return count;
}
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint32_t avail_idx, uint32_t *vec_idx,
struct buf_vector *buf_vec, uint16_t *desc_chain_head,
@@ -424,7 +438,7 @@ reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
return 0;
}
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
struct buf_vector *buf_vec, uint16_t num_buffers)
{
@@ -512,7 +526,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
return 0;
}
-static inline uint32_t __attribute__((always_inline))
+static __rte_always_inline uint32_t
virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
struct rte_mbuf **pkts, uint32_t count)
{
@@ -601,9 +615,11 @@ static inline bool
virtio_net_with_host_offload(struct virtio_net *dev)
{
if (dev->features &
- (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_ECN |
- VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
- VIRTIO_NET_F_HOST_UFO))
+ ((1ULL << VIRTIO_NET_F_CSUM) |
+ (1ULL << VIRTIO_NET_F_HOST_ECN) |
+ (1ULL << VIRTIO_NET_F_HOST_TSO4) |
+ (1ULL << VIRTIO_NET_F_HOST_TSO6) |
+ (1ULL << VIRTIO_NET_F_HOST_UFO)))
return true;
return false;
@@ -655,7 +671,7 @@ parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr)
}
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
{
uint16_t l4_proto = 0;
@@ -743,13 +759,13 @@ make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
return 0;
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
put_zmbuf(struct zcopy_mbuf *zmbuf)
{
zmbuf->in_use = 0;
}
-static inline int __attribute__((always_inline))
+static __rte_always_inline int
copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
uint16_t max_desc, struct rte_mbuf *m, uint16_t desc_idx,
struct rte_mempool *mbuf_pool)
@@ -899,7 +915,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
return 0;
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint32_t used_idx, uint32_t desc_idx)
{
@@ -910,7 +926,7 @@ update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
sizeof(vq->used->ring[used_idx]));
}
-static inline void __attribute__((always_inline))
+static __rte_always_inline void
update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint32_t count)
{
@@ -930,7 +946,7 @@ update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq,
eventfd_write(vq->callfd, (eventfd_t)1);
}
-static inline struct zcopy_mbuf *__attribute__((always_inline))
+static __rte_always_inline struct zcopy_mbuf *
get_zmbuf(struct vhost_virtqueue *vq)
{
uint16_t i;
@@ -961,7 +977,7 @@ again:
return NULL;
}
-static inline bool __attribute__((always_inline))
+static __rte_always_inline bool
mbuf_is_consumed(struct rte_mbuf *m)
{
while (m) {