summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorLuca Boccassi <luca.boccassi@gmail.com>2017-11-08 14:15:11 +0000
committerLuca Boccassi <luca.boccassi@gmail.com>2017-11-08 14:45:54 +0000
commit055c52583a2794da8ba1e85a48cce3832372b12f (patch)
tree8ceb1cb78fbb46a0f341f8ee24feb3c6b5540013 /lib
parentf239aed5e674965691846e8ce3f187dd47523689 (diff)
New upstream version 17.11-rc3
Change-Id: I6a5baa40612fe0c20f30b5fa773a6cbbac63a685 Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/Makefile18
-rw-r--r--lib/librte_acl/Makefile1
-rw-r--r--lib/librte_acl/rte_acl.c3
-rw-r--r--lib/librte_acl/rte_acl_osdep.h1
-rw-r--r--lib/librte_bitratestats/Makefile3
-rw-r--r--lib/librte_bitratestats/rte_bitrate.c2
-rw-r--r--lib/librte_bitratestats/rte_bitrate.h2
-rw-r--r--lib/librte_cfgfile/Makefile1
-rw-r--r--lib/librte_cfgfile/rte_cfgfile.c441
-rw-r--r--lib/librte_cfgfile/rte_cfgfile.h96
-rw-r--r--lib/librte_cfgfile/rte_cfgfile_version.map11
-rw-r--r--lib/librte_cmdline/Makefile1
-rw-r--r--lib/librte_cmdline/cmdline.c3
-rw-r--r--lib/librte_cmdline/cmdline_parse.c2
-rw-r--r--lib/librte_cryptodev/Makefile6
-rw-r--r--lib/librte_cryptodev/rte_crypto.h6
-rw-r--r--lib/librte_cryptodev/rte_crypto_sym.h42
-rw-r--r--lib/librte_cryptodev/rte_cryptodev.c50
-rw-r--r--lib/librte_cryptodev/rte_cryptodev.h72
-rw-r--r--lib/librte_cryptodev/rte_cryptodev_pci.h92
-rw-r--r--lib/librte_cryptodev/rte_cryptodev_pmd.c212
-rw-r--r--lib/librte_cryptodev/rte_cryptodev_pmd.h116
-rw-r--r--lib/librte_cryptodev/rte_cryptodev_vdev.h100
-rw-r--r--lib/librte_cryptodev/rte_cryptodev_version.map16
-rw-r--r--lib/librte_distributor/Makefile1
-rw-r--r--lib/librte_distributor/rte_distributor.c2
-rw-r--r--lib/librte_distributor/rte_distributor_v20.c2
-rw-r--r--lib/librte_eal/bsdapp/eal/Makefile11
-rw-r--r--lib/librte_eal/bsdapp/eal/eal.c112
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_interrupts.c35
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_memory.c21
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_pci.c670
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_thread.c1
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_timer.c1
-rw-r--r--lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h107
-rw-r--r--lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h137
-rw-r--r--lib/librte_eal/bsdapp/eal/rte_eal_version.map239
-rw-r--r--lib/librte_eal/common/Makefile10
-rw-r--r--lib/librte_eal/common/arch/arm/rte_cpuflags.c2
-rw-r--r--lib/librte_eal/common/arch/arm/rte_cycles.c45
-rw-r--r--lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c2
-rw-r--r--lib/librte_eal/common/arch/ppc_64/rte_cycles.c52
-rw-r--r--lib/librte_eal/common/arch/x86/rte_cpuflags.c40
-rw-r--r--lib/librte_eal/common/arch/x86/rte_cycles.c152
-rw-r--r--lib/librte_eal/common/arch/x86/rte_memcpy.c58
-rw-r--r--lib/librte_eal/common/arch/x86/rte_spinlock.c3
-rw-r--r--lib/librte_eal/common/eal_common_bus.c49
-rw-r--r--lib/librte_eal/common/eal_common_dev.c22
-rw-r--r--lib/librte_eal/common/eal_common_errno.c22
-rw-r--r--lib/librte_eal/common/eal_common_launch.c1
-rw-r--r--lib/librte_eal/common/eal_common_log.c43
-rw-r--r--lib/librte_eal/common/eal_common_memory.c5
-rw-r--r--lib/librte_eal/common/eal_common_memzone.c6
-rw-r--r--lib/librte_eal/common/eal_common_options.c11
-rw-r--r--lib/librte_eal/common/eal_common_pci.c580
-rw-r--r--lib/librte_eal/common/eal_common_pci_uio.c233
-rw-r--r--lib/librte_eal/common/eal_common_tailqs.c1
-rw-r--r--lib/librte_eal/common/eal_common_thread.c14
-rw-r--r--lib/librte_eal/common/eal_common_timer.c8
-rw-r--r--lib/librte_eal/common/eal_common_vdev.c342
-rw-r--r--lib/librte_eal/common/eal_internal_cfg.h3
-rw-r--r--lib/librte_eal/common/eal_options.h4
-rw-r--r--lib/librte_eal/common/eal_private.h155
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_vect.h2
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_atomic_32.h2
-rw-r--r--lib/librte_eal/common/include/rte_bitmap.h (renamed from lib/librte_sched/rte_bitmap.h)0
-rw-r--r--lib/librte_eal/common/include/rte_bus.h42
-rw-r--r--lib/librte_eal/common/include/rte_common.h23
-rw-r--r--lib/librte_eal/common/include/rte_debug.h2
-rw-r--r--lib/librte_eal/common/include/rte_dev.h31
-rw-r--r--lib/librte_eal/common/include/rte_eal.h52
-rw-r--r--lib/librte_eal/common/include/rte_eal_interrupts.h (renamed from lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h)25
-rw-r--r--lib/librte_eal/common/include/rte_interrupts.h2
-rw-r--r--lib/librte_eal/common/include/rte_lcore.h14
-rw-r--r--lib/librte_eal/common/include/rte_log.h30
-rw-r--r--lib/librte_eal/common/include/rte_malloc.h17
-rw-r--r--lib/librte_eal/common/include/rte_memory.h103
-rw-r--r--lib/librte_eal/common/include/rte_memzone.h6
-rw-r--r--lib/librte_eal/common/include/rte_pci.h598
-rw-r--r--lib/librte_eal/common/include/rte_service.h197
-rw-r--r--lib/librte_eal/common/include/rte_service_component.h36
-rw-r--r--lib/librte_eal/common/include/rte_vdev.h131
-rw-r--r--lib/librte_eal/common/include/rte_version.h6
-rw-r--r--lib/librte_eal/common/include/rte_vfio.h153
-rw-r--r--lib/librte_eal/common/malloc_elem.c8
-rw-r--r--lib/librte_eal/common/malloc_elem.h4
-rw-r--r--lib/librte_eal/common/rte_malloc.c21
-rw-r--r--lib/librte_eal/common/rte_service.c362
-rw-r--r--lib/librte_eal/linuxapp/Makefile2
-rw-r--r--lib/librte_eal/linuxapp/eal/Makefile20
-rw-r--r--lib/librte_eal/linuxapp/eal/eal.c117
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_alarm.c1
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_hugepage_info.c1
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_interrupts.c21
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_log.c1
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_memory.c99
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci.c722
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_init.h97
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_uio.c567
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_vfio.c674
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_thread.c1
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_timer.c1
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio.c75
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio.h49
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c7
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_xen_memory.c381
-rw-r--r--lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h108
-rw-r--r--lib/librte_eal/linuxapp/igb_uio/compat.h21
-rw-r--r--lib/librte_eal/linuxapp/igb_uio/igb_uio.c315
-rw-r--r--lib/librte_eal/linuxapp/kni/compat.h31
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h24
-rw-r--r--lib/librte_eal/linuxapp/xen_dom0/compat.h15
-rw-r--r--lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h107
-rw-r--r--lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c780
-rw-r--r--lib/librte_eal/rte_eal_version.map (renamed from lib/librte_eal/linuxapp/eal/rte_eal_version.map)72
-rw-r--r--lib/librte_efd/Makefile1
-rw-r--r--lib/librte_efd/rte_efd.c3
-rw-r--r--lib/librte_ether/Makefile10
-rw-r--r--lib/librte_ether/ethdev_profile.c164
-rw-r--r--lib/librte_ether/ethdev_profile.h56
-rw-r--r--lib/librte_ether/rte_ethdev.c960
-rw-r--r--lib/librte_ether/rte_ethdev.h434
-rw-r--r--lib/librte_ether/rte_ethdev_pci.h1
-rw-r--r--lib/librte_ether/rte_ethdev_vdev.h2
-rw-r--r--lib/librte_ether/rte_ethdev_version.map (renamed from lib/librte_ether/rte_ether_version.map)28
-rw-r--r--lib/librte_ether/rte_flow.c63
-rw-r--r--lib/librte_ether/rte_flow.h175
-rw-r--r--lib/librte_ether/rte_flow_driver.h40
-rw-r--r--lib/librte_ether/rte_mtr.c229
-rw-r--r--lib/librte_ether/rte_mtr.h730
-rw-r--r--lib/librte_ether/rte_mtr_driver.h221
-rw-r--r--lib/librte_ether/rte_tm.c62
-rw-r--r--lib/librte_ether/rte_tm.h60
-rw-r--r--lib/librte_ether/rte_tm_driver.h2
-rw-r--r--lib/librte_eventdev/Makefile5
-rw-r--r--lib/librte_eventdev/rte_event_eth_rx_adapter.c1240
-rw-r--r--lib/librte_eventdev/rte_event_eth_rx_adapter.h444
-rw-r--r--lib/librte_eventdev/rte_eventdev.c286
-rw-r--r--lib/librte_eventdev/rte_eventdev.h312
-rw-r--r--lib/librte_eventdev/rte_eventdev_pmd.h182
-rw-r--r--lib/librte_eventdev/rte_eventdev_pmd_pci.h1
-rw-r--r--lib/librte_eventdev/rte_eventdev_pmd_vdev.h2
-rw-r--r--lib/librte_eventdev/rte_eventdev_version.map27
-rw-r--r--lib/librte_flow_classify/Makefile53
-rw-r--r--lib/librte_flow_classify/rte_flow_classify.c691
-rw-r--r--lib/librte_flow_classify/rte_flow_classify.h289
-rw-r--r--lib/librte_flow_classify/rte_flow_classify_parse.c546
-rw-r--r--lib/librte_flow_classify/rte_flow_classify_parse.h74
-rw-r--r--lib/librte_flow_classify/rte_flow_classify_version.map12
-rw-r--r--lib/librte_gro/Makefile1
-rw-r--r--lib/librte_gro/rte_gro_version.map4
-rw-r--r--lib/librte_gso/Makefile54
-rw-r--r--lib/librte_gso/gso_common.c153
-rw-r--r--lib/librte_gso/gso_common.h171
-rw-r--r--lib/librte_gso/gso_tcp4.c102
-rw-r--r--lib/librte_gso/gso_tcp4.h74
-rw-r--r--lib/librte_gso/gso_tunnel_tcp4.c126
-rw-r--r--lib/librte_gso/gso_tunnel_tcp4.h75
-rw-r--r--lib/librte_gso/rte_gso.c110
-rw-r--r--lib/librte_gso/rte_gso.h148
-rw-r--r--lib/librte_gso/rte_gso_version.map7
-rw-r--r--lib/librte_hash/Makefile1
-rw-r--r--lib/librte_hash/rte_crc_arm64.h3
-rw-r--r--lib/librte_hash/rte_cuckoo_hash.c14
-rw-r--r--lib/librte_hash/rte_fbk_hash.c1
-rw-r--r--lib/librte_hash/rte_hash_crc.h3
-rw-r--r--lib/librte_hash/rte_jhash.h5
-rw-r--r--lib/librte_hash/rte_thash.h22
-rw-r--r--lib/librte_ip_frag/Makefile4
-rw-r--r--lib/librte_ip_frag/rte_ip_frag_version.map (renamed from lib/librte_ip_frag/rte_ipfrag_version.map)0
-rw-r--r--lib/librte_jobstats/Makefile1
-rw-r--r--lib/librte_jobstats/rte_jobstats.h2
-rw-r--r--lib/librte_kni/Makefile1
-rw-r--r--lib/librte_kni/rte_kni.c2
-rw-r--r--lib/librte_kni/rte_kni.h8
-rw-r--r--lib/librte_kvargs/Makefile1
-rw-r--r--lib/librte_latencystats/Makefile1
-rw-r--r--lib/librte_latencystats/rte_latencystats.c12
-rw-r--r--lib/librte_lpm/Makefile1
-rw-r--r--lib/librte_lpm/rte_lpm.c7
-rw-r--r--lib/librte_lpm/rte_lpm6.c6
-rw-r--r--lib/librte_mbuf/Makefile1
-rw-r--r--lib/librte_mbuf/rte_mbuf.c29
-rw-r--r--lib/librte_mbuf/rte_mbuf.h125
-rw-r--r--lib/librte_mbuf/rte_mbuf_ptype.c3
-rw-r--r--lib/librte_mbuf/rte_mbuf_ptype.h43
-rw-r--r--lib/librte_member/Makefile (renamed from lib/librte_eal/linuxapp/xen_dom0/Makefile)33
-rw-r--r--lib/librte_member/rte_member.c336
-rw-r--r--lib/librte_member/rte_member.h513
-rw-r--r--lib/librte_member/rte_member_ht.c586
-rw-r--r--lib/librte_member/rte_member_ht.h94
-rw-r--r--lib/librte_member/rte_member_vbf.c350
-rw-r--r--lib/librte_member/rte_member_vbf.h82
-rw-r--r--lib/librte_member/rte_member_version.map16
-rw-r--r--lib/librte_member/rte_member_x86.h107
-rw-r--r--lib/librte_mempool/Makefile3
-rw-r--r--lib/librte_mempool/rte_mempool.c177
-rw-r--r--lib/librte_mempool/rte_mempool.h194
-rw-r--r--lib/librte_mempool/rte_mempool_ops.c29
-rw-r--r--lib/librte_mempool/rte_mempool_version.map10
-rw-r--r--lib/librte_meter/Makefile1
-rw-r--r--lib/librte_metrics/Makefile1
-rw-r--r--lib/librte_metrics/rte_metrics.c2
-rw-r--r--lib/librte_net/Makefile3
-rw-r--r--lib/librte_net/rte_esp.h60
-rw-r--r--lib/librte_net/rte_ether.h2
-rw-r--r--lib/librte_net/rte_net.c1
-rw-r--r--lib/librte_net/rte_net_crc.c3
-rw-r--r--lib/librte_pci/Makefile49
-rw-r--r--lib/librte_pci/rte_pci.c212
-rw-r--r--lib/librte_pci/rte_pci.h263
-rw-r--r--lib/librte_pci/rte_pci_version.map15
-rw-r--r--lib/librte_pdump/Makefile3
-rw-r--r--lib/librte_pdump/rte_pdump.c16
-rw-r--r--lib/librte_pdump/rte_pdump.h4
-rw-r--r--lib/librte_pipeline/Makefile2
-rw-r--r--lib/librte_pipeline/rte_pipeline.c1
-rw-r--r--lib/librte_port/Makefile5
-rw-r--r--lib/librte_port/rte_port_ethdev.c6
-rw-r--r--lib/librte_port/rte_port_ethdev.h6
-rw-r--r--lib/librte_power/Makefile1
-rw-r--r--lib/librte_power/channel_commands.h44
-rw-r--r--lib/librte_power/guest_channel.c7
-rw-r--r--lib/librte_power/guest_channel.h15
-rw-r--r--lib/librte_power/rte_power.c9
-rw-r--r--lib/librte_power/rte_power.h41
-rw-r--r--lib/librte_power/rte_power_acpi_cpufreq.c111
-rw-r--r--lib/librte_power/rte_power_acpi_cpufreq.h40
-rw-r--r--lib/librte_power/rte_power_kvm_vm.c19
-rw-r--r--lib/librte_power/rte_power_kvm_vm.h35
-rw-r--r--lib/librte_power/rte_power_version.map10
-rw-r--r--lib/librte_reorder/Makefile1
-rw-r--r--lib/librte_reorder/rte_reorder.c1
-rw-r--r--lib/librte_ring/Makefile1
-rw-r--r--lib/librte_ring/rte_ring.h2
-rw-r--r--lib/librte_sched/Makefile4
-rw-r--r--lib/librte_sched/rte_sched.c2
-rw-r--r--lib/librte_security/Makefile54
-rw-r--r--lib/librte_security/rte_security.c149
-rw-r--r--lib/librte_security/rte_security.h529
-rw-r--r--lib/librte_security/rte_security_driver.h156
-rw-r--r--lib/librte_security/rte_security_version.map14
-rw-r--r--lib/librte_table/Makefile7
-rw-r--r--lib/librte_table/rte_table_hash.h305
-rw-r--r--lib/librte_table/rte_table_hash_cuckoo.c205
-rw-r--r--lib/librte_table/rte_table_hash_ext.c417
-rw-r--r--lib/librte_table/rte_table_hash_key16.c750
-rw-r--r--lib/librte_table/rte_table_hash_key32.c436
-rw-r--r--lib/librte_table/rte_table_hash_key8.c716
-rw-r--r--lib/librte_table/rte_table_hash_lru.c516
-rw-r--r--lib/librte_table/rte_table_version.map26
-rw-r--r--lib/librte_timer/Makefile1
-rw-r--r--lib/librte_timer/rte_timer.c6
-rw-r--r--lib/librte_vhost/Makefile5
-rw-r--r--lib/librte_vhost/fd_man.c5
-rw-r--r--lib/librte_vhost/iotlb.c350
-rw-r--r--lib/librte_vhost/iotlb.h76
-rw-r--r--lib/librte_vhost/rte_vhost.h6
-rw-r--r--lib/librte_vhost/socket.c37
-rw-r--r--lib/librte_vhost/vhost.c131
-rw-r--r--lib/librte_vhost/vhost.h76
-rw-r--r--lib/librte_vhost/vhost_user.c370
-rw-r--r--lib/librte_vhost/vhost_user.h25
-rw-r--r--lib/librte_vhost/virtio_net.c339
264 files changed, 16931 insertions, 11705 deletions
diff --git a/lib/Makefile b/lib/Makefile
index 86caba17..dc4e8df7 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -33,6 +33,8 @@ include $(RTE_SDK)/mk/rte.vars.mk
DIRS-y += librte_compat
DIRS-$(CONFIG_RTE_LIBRTE_EAL) += librte_eal
+DIRS-$(CONFIG_RTE_LIBRTE_PCI) += librte_pci
+DEPDIRS-librte_pci := librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_RING) += librte_ring
DEPDIRS-librte_ring := librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += librte_mempool
@@ -42,7 +44,6 @@ DEPDIRS-librte_mbuf := librte_eal librte_mempool
DIRS-$(CONFIG_RTE_LIBRTE_TIMER) += librte_timer
DEPDIRS-librte_timer := librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_CFGFILE) += librte_cfgfile
-DEPDIRS-librte_cfgfile := librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline
DEPDIRS-librte_cmdline := librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether
@@ -51,8 +52,12 @@ DEPDIRS-librte_ether += librte_mbuf
DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += librte_cryptodev
DEPDIRS-librte_cryptodev := librte_eal librte_mempool librte_ring librte_mbuf
DEPDIRS-librte_cryptodev += librte_kvargs
+DIRS-$(CONFIG_RTE_LIBRTE_SECURITY) += librte_security
+DEPDIRS-librte_security := librte_eal librte_mempool librte_ring librte_mbuf
+DEPDIRS-librte_security += librte_ether
+DEPDIRS-librte_security += librte_cryptodev
DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += librte_eventdev
-DEPDIRS-librte_eventdev := librte_eal librte_ring
+DEPDIRS-librte_eventdev := librte_eal librte_ring librte_ether librte_hash
DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
DEPDIRS-librte_vhost := librte_eal librte_mempool librte_mbuf librte_ether
DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
@@ -63,6 +68,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
DEPDIRS-librte_lpm := librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
DEPDIRS-librte_acl := librte_eal
+DIRS-$(CONFIG_RTE_LIBRTE_MEMBER) += librte_member
+DEPDIRS-librte_member := librte_eal librte_hash
DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net
DEPDIRS-librte_net := librte_mbuf librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += librte_ip_frag
@@ -82,6 +89,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power
DEPDIRS-librte_power := librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter
DEPDIRS-librte_meter := librte_eal
+DIRS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += librte_flow_classify
+DEPDIRS-librte_flow_classify := librte_net librte_table librte_acl
DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched
DEPDIRS-librte_sched := librte_eal librte_mempool librte_mbuf librte_net
DEPDIRS-librte_sched += librte_timer
@@ -108,10 +117,13 @@ DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += librte_reorder
DEPDIRS-librte_reorder := librte_eal librte_mempool librte_mbuf
DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += librte_pdump
DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ether
+DIRS-$(CONFIG_RTE_LIBRTE_GSO) += librte_gso
+DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ether librte_net
+DEPDIRS-librte_gso += librte_mempool
ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
endif
-DEPDIRS-librte_kni:= librte_eal librte_mempool librte_mbuf librte_ether
+DEPDIRS-librte_kni := librte_eal librte_mempool librte_mbuf librte_ether
include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile
index 59767920..e7e3c91d 100644
--- a/lib/librte_acl/Makefile
+++ b/lib/librte_acl/Makefile
@@ -36,6 +36,7 @@ LIB = librte_acl.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+LDLIBS += -lrte_eal
EXPORT_MAP := rte_acl_version.map
diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
index d1f40bef..67f41f3d 100644
--- a/lib/librte_acl/rte_acl.c
+++ b/lib/librte_acl/rte_acl.c
@@ -120,8 +120,7 @@ rte_acl_set_ctx_classify(struct rte_acl_ctx *ctx, enum rte_acl_classify_alg alg)
* if both conditions are met:
* at build time compiler supports AVX2 and target cpu supports AVX2.
*/
-static void __attribute__((constructor))
-rte_acl_init(void)
+RTE_INIT(rte_acl_init)
{
enum rte_acl_classify_alg alg = RTE_ACL_CLASSIFY_DEFAULT;
diff --git a/lib/librte_acl/rte_acl_osdep.h b/lib/librte_acl/rte_acl_osdep.h
index 9e4af530..ac712bfa 100644
--- a/lib/librte_acl/rte_acl_osdep.h
+++ b/lib/librte_acl/rte_acl_osdep.h
@@ -66,7 +66,6 @@
#include <rte_prefetch.h>
#include <rte_byteorder.h>
#include <rte_branch_prediction.h>
-#include <rte_memzone.h>
#include <rte_malloc.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
diff --git a/lib/librte_bitratestats/Makefile b/lib/librte_bitratestats/Makefile
index 58a20ea0..5054b679 100644
--- a/lib/librte_bitratestats/Makefile
+++ b/lib/librte_bitratestats/Makefile
@@ -35,10 +35,11 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_bitratestats.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+LDLIBS += -lrte_eal -lrte_metrics -lrte_ethdev
EXPORT_MAP := rte_bitratestats_version.map
-LIBABIVER := 1
+LIBABIVER := 2
# all source are stored in SRCS-y
SRCS-$(CONFIG_RTE_LIBRTE_BITRATE) := rte_bitrate.c
diff --git a/lib/librte_bitratestats/rte_bitrate.c b/lib/librte_bitratestats/rte_bitrate.c
index 3ceb3516..f373697a 100644
--- a/lib/librte_bitratestats/rte_bitrate.c
+++ b/lib/librte_bitratestats/rte_bitrate.c
@@ -84,7 +84,7 @@ rte_stats_bitrate_reg(struct rte_stats_bitrates *bitrate_data)
int
rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data,
- uint8_t port_id)
+ uint16_t port_id)
{
struct rte_stats_bitrate *port_data;
struct rte_eth_stats eth_stats;
diff --git a/lib/librte_bitratestats/rte_bitrate.h b/lib/librte_bitratestats/rte_bitrate.h
index 15fc270a..16467221 100644
--- a/lib/librte_bitratestats/rte_bitrate.h
+++ b/lib/librte_bitratestats/rte_bitrate.h
@@ -85,7 +85,7 @@ int rte_stats_bitrate_reg(struct rte_stats_bitrates *bitrate_data);
* - Negative value on error
*/
int rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data,
- uint8_t port_id);
+ uint16_t port_id);
#ifdef __cplusplus
}
diff --git a/lib/librte_cfgfile/Makefile b/lib/librte_cfgfile/Makefile
index 755ef11f..0bee43e2 100644
--- a/lib/librte_cfgfile/Makefile
+++ b/lib/librte_cfgfile/Makefile
@@ -38,6 +38,7 @@ LIB = librte_cfgfile.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -I$(SRCDIR)/../librte_eal/common/include
EXPORT_MAP := rte_cfgfile_version.map
diff --git a/lib/librte_cfgfile/rte_cfgfile.c b/lib/librte_cfgfile/rte_cfgfile.c
index b54a523d..eacf93a8 100644
--- a/lib/librte_cfgfile/rte_cfgfile.c
+++ b/lib/librte_cfgfile/rte_cfgfile.c
@@ -35,21 +35,23 @@
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
+#include <errno.h>
#include <rte_common.h>
-#include <rte_string_fns.h>
#include "rte_cfgfile.h"
struct rte_cfgfile_section {
char name[CFG_NAME_LEN];
int num_entries;
- struct rte_cfgfile_entry *entries[0];
+ int allocated_entries;
+ struct rte_cfgfile_entry *entries;
};
struct rte_cfgfile {
int flags;
int num_sections;
- struct rte_cfgfile_section *sections[0];
+ int allocated_sections;
+ struct rte_cfgfile_section *sections;
};
/** when we resize a file structure, how many extra entries
@@ -105,6 +107,49 @@ _strip(char *str, unsigned len)
return newlen;
}
+static struct rte_cfgfile_section *
+_get_section(struct rte_cfgfile *cfg, const char *sectionname)
+{
+ int i;
+
+ for (i = 0; i < cfg->num_sections; i++) {
+ if (strncmp(cfg->sections[i].name, sectionname,
+ sizeof(cfg->sections[0].name)) == 0)
+ return &cfg->sections[i];
+ }
+ return NULL;
+}
+
+static int
+_add_entry(struct rte_cfgfile_section *section, const char *entryname,
+ const char *entryvalue)
+{
+ /* resize entry structure if we don't have room for more entries */
+ if (section->num_entries == section->allocated_entries) {
+ struct rte_cfgfile_entry *n_entries = realloc(
+ section->entries,
+ sizeof(struct rte_cfgfile_entry) *
+ ((section->allocated_entries) +
+ CFG_ALLOC_ENTRY_BATCH));
+
+ if (n_entries == NULL)
+ return -ENOMEM;
+
+ section->entries = n_entries;
+ section->allocated_entries += CFG_ALLOC_ENTRY_BATCH;
+ }
+ /* fill up entry fields with key name and value */
+ struct rte_cfgfile_entry *curr_entry =
+ &section->entries[section->num_entries];
+
+ snprintf(curr_entry->name, sizeof(curr_entry->name), "%s", entryname);
+ snprintf(curr_entry->value,
+ sizeof(curr_entry->value), "%s", entryvalue);
+ section->num_entries++;
+
+ return 0;
+}
+
static int
rte_cfgfile_check_params(const struct rte_cfgfile_parameters *params)
{
@@ -144,10 +189,6 @@ struct rte_cfgfile *
rte_cfgfile_load_with_params(const char *filename, int flags,
const struct rte_cfgfile_parameters *params)
{
- int allocated_sections = CFG_ALLOC_SECTION_BATCH;
- int allocated_entries = 0;
- int curr_section = -1;
- int curr_entry = -1;
char buffer[CFG_NAME_LEN + CFG_VALUE_LEN + 4] = {0};
int lineno = 0;
struct rte_cfgfile *cfg = NULL;
@@ -159,28 +200,7 @@ rte_cfgfile_load_with_params(const char *filename, int flags,
if (f == NULL)
return NULL;
- cfg = malloc(sizeof(*cfg) + sizeof(cfg->sections[0]) *
- allocated_sections);
- if (cfg == NULL)
- goto error2;
-
- memset(cfg->sections, 0, sizeof(cfg->sections[0]) * allocated_sections);
-
- if (flags & CFG_FLAG_GLOBAL_SECTION) {
- curr_section = 0;
- allocated_entries = CFG_ALLOC_ENTRY_BATCH;
- cfg->sections[curr_section] = malloc(
- sizeof(*cfg->sections[0]) +
- sizeof(cfg->sections[0]->entries[0]) *
- allocated_entries);
- if (cfg->sections[curr_section] == NULL) {
- printf("Error - no memory for global section\n");
- goto error1;
- }
-
- snprintf(cfg->sections[curr_section]->name,
- sizeof(cfg->sections[0]->name), "GLOBAL");
- }
+ cfg = rte_cfgfile_create(flags);
while (fgets(buffer, sizeof(buffer), f) != NULL) {
char *pos = NULL;
@@ -191,13 +211,15 @@ rte_cfgfile_load_with_params(const char *filename, int flags,
"Check if line too long\n", lineno);
goto error1;
}
+ /* skip parsing if comment character found */
pos = memchr(buffer, params->comment_character, len);
- if (pos != NULL) {
+ if (pos != NULL && (*(pos-1) != '\\')) {
*pos = '\0';
len = pos - buffer;
}
len = _strip(buffer, len);
+ /* skip lines without useful content */
if (buffer[0] != '[' && memchr(buffer, '=', len) == NULL)
continue;
@@ -205,151 +227,252 @@ rte_cfgfile_load_with_params(const char *filename, int flags,
/* section heading line */
char *end = memchr(buffer, ']', len);
if (end == NULL) {
- printf("Error line %d - no terminating '['"
+ printf("Error line %d - no terminating ']'"
"character found\n", lineno);
goto error1;
}
*end = '\0';
_strip(&buffer[1], end - &buffer[1]);
- /* close off old section and add start new one */
- if (curr_section >= 0)
- cfg->sections[curr_section]->num_entries =
- curr_entry + 1;
- curr_section++;
-
- /* resize overall struct if we don't have room for more
- sections */
- if (curr_section == allocated_sections) {
- allocated_sections += CFG_ALLOC_SECTION_BATCH;
- struct rte_cfgfile *n_cfg = realloc(cfg,
- sizeof(*cfg) + sizeof(cfg->sections[0])
- * allocated_sections);
- if (n_cfg == NULL) {
- curr_section--;
- printf("Error - no more memory\n");
- goto error1;
- }
- cfg = n_cfg;
- }
-
- /* allocate space for new section */
- allocated_entries = CFG_ALLOC_ENTRY_BATCH;
- curr_entry = -1;
- cfg->sections[curr_section] = malloc(
- sizeof(*cfg->sections[0]) +
- sizeof(cfg->sections[0]->entries[0]) *
- allocated_entries);
- if (cfg->sections[curr_section] == NULL) {
- printf("Error - no more memory\n");
- goto error1;
- }
-
- snprintf(cfg->sections[curr_section]->name,
- sizeof(cfg->sections[0]->name),
- "%s", &buffer[1]);
+ rte_cfgfile_add_section(cfg, &buffer[1]);
} else {
- /* value line */
- if (curr_section < 0) {
- printf("Error line %d - value outside of"
- "section\n", lineno);
+ /* key and value line */
+ char *split[2] = {NULL};
+
+ split[0] = buffer;
+ split[1] = memchr(buffer, '=', len);
+ if (split[1] == NULL) {
+ printf("Error line %d - no '='"
+ "character found\n", lineno);
goto error1;
}
-
- struct rte_cfgfile_section *sect =
- cfg->sections[curr_section];
- int n;
- char *split[2] = {NULL};
- n = rte_strsplit(buffer, sizeof(buffer), split, 2, '=');
- if (flags & CFG_FLAG_EMPTY_VALUES) {
- if ((n < 1) || (n > 2)) {
- printf("Error at line %d - cannot split string, n=%d\n",
- lineno, n);
- goto error1;
- }
- } else {
- if (n != 2) {
- printf("Error at line %d - cannot split string, n=%d\n",
- lineno, n);
- goto error1;
- }
+ *split[1] = '\0';
+ split[1]++;
+
+ _strip(split[0], strlen(split[0]));
+ _strip(split[1], strlen(split[1]));
+ char *end = memchr(split[1], '\\', strlen(split[1]));
+
+ while (end != NULL) {
+ if (*(end+1) == params->comment_character) {
+ *end = '\0';
+ strcat(split[1], end+1);
+ } else
+ end++;
+ end = memchr(end, '\\', strlen(end));
}
- curr_entry++;
- if (curr_entry == allocated_entries) {
- allocated_entries += CFG_ALLOC_ENTRY_BATCH;
- struct rte_cfgfile_section *n_sect = realloc(
- sect, sizeof(*sect) +
- sizeof(sect->entries[0]) *
- allocated_entries);
- if (n_sect == NULL) {
- curr_entry--;
- printf("Error - no more memory\n");
- goto error1;
- }
- sect = cfg->sections[curr_section] = n_sect;
+ if (!(flags & CFG_FLAG_EMPTY_VALUES) &&
+ (*split[1] == '\0')) {
+ printf("Error at line %d - cannot use empty "
+ "values\n", lineno);
+ goto error1;
}
- sect->entries[curr_entry] = malloc(
- sizeof(*sect->entries[0]));
- if (sect->entries[curr_entry] == NULL) {
- printf("Error - no more memory\n");
+ if (cfg->num_sections == 0)
goto error1;
- }
- struct rte_cfgfile_entry *entry = sect->entries[
- curr_entry];
- snprintf(entry->name, sizeof(entry->name), "%s",
- split[0]);
- snprintf(entry->value, sizeof(entry->value), "%s",
- split[1] ? split[1] : "");
- _strip(entry->name, strnlen(entry->name,
- sizeof(entry->name)));
- _strip(entry->value, strnlen(entry->value,
- sizeof(entry->value)));
+ _add_entry(&cfg->sections[cfg->num_sections - 1],
+ split[0], split[1]);
}
}
fclose(f);
- cfg->flags = flags;
- cfg->num_sections = curr_section + 1;
- /* curr_section will still be -1 if we have an empty file */
- if (curr_section >= 0)
- cfg->sections[curr_section]->num_entries = curr_entry + 1;
return cfg;
-
error1:
- cfg->num_sections = curr_section + 1;
- if (curr_section >= 0)
- cfg->sections[curr_section]->num_entries = curr_entry + 1;
rte_cfgfile_close(cfg);
-error2:
fclose(f);
return NULL;
}
+struct rte_cfgfile *
+rte_cfgfile_create(int flags)
+{
+ int i;
+ struct rte_cfgfile *cfg = NULL;
-int rte_cfgfile_close(struct rte_cfgfile *cfg)
+ cfg = malloc(sizeof(*cfg));
+
+ if (cfg == NULL)
+ return NULL;
+
+ cfg->flags = flags;
+ cfg->num_sections = 0;
+
+ /* allocate first batch of sections and entries */
+ cfg->sections = malloc(sizeof(struct rte_cfgfile_section) *
+ CFG_ALLOC_SECTION_BATCH);
+
+ if (cfg->sections == NULL)
+ goto error1;
+
+ cfg->allocated_sections = CFG_ALLOC_SECTION_BATCH;
+
+ for (i = 0; i < CFG_ALLOC_SECTION_BATCH; i++) {
+ cfg->sections[i].entries = malloc(sizeof(
+ struct rte_cfgfile_entry) * CFG_ALLOC_ENTRY_BATCH);
+
+ if (cfg->sections[i].entries == NULL)
+ goto error1;
+
+ cfg->sections[i].num_entries = 0;
+ cfg->sections[i].allocated_entries = CFG_ALLOC_ENTRY_BATCH;
+ }
+
+ if (flags & CFG_FLAG_GLOBAL_SECTION)
+ rte_cfgfile_add_section(cfg, "GLOBAL");
+
+ return cfg;
+error1:
+ if (cfg->sections != NULL) {
+ for (i = 0; i < cfg->allocated_sections; i++) {
+ if (cfg->sections[i].entries != NULL) {
+ free(cfg->sections[i].entries);
+ cfg->sections[i].entries = NULL;
+ }
+ }
+ free(cfg->sections);
+ cfg->sections = NULL;
+ }
+ free(cfg);
+ return NULL;
+}
+
+int
+rte_cfgfile_add_section(struct rte_cfgfile *cfg, const char *sectionname)
+{
+ int i;
+
+ if (cfg == NULL)
+ return -EINVAL;
+
+ if (sectionname == NULL)
+ return -EINVAL;
+
+ /* resize overall struct if we don't have room for more sections */
+ if (cfg->num_sections == cfg->allocated_sections) {
+
+ struct rte_cfgfile_section *n_sections =
+ realloc(cfg->sections,
+ sizeof(struct rte_cfgfile_section) *
+ ((cfg->allocated_sections) +
+ CFG_ALLOC_SECTION_BATCH));
+
+ if (n_sections == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < CFG_ALLOC_SECTION_BATCH; i++) {
+ n_sections[i + cfg->allocated_sections].num_entries = 0;
+ n_sections[i +
+ cfg->allocated_sections].allocated_entries = 0;
+ n_sections[i + cfg->allocated_sections].entries = NULL;
+ }
+ cfg->sections = n_sections;
+ cfg->allocated_sections += CFG_ALLOC_SECTION_BATCH;
+ }
+
+ snprintf(cfg->sections[cfg->num_sections].name,
+ sizeof(cfg->sections[0].name), "%s", sectionname);
+ cfg->sections[cfg->num_sections].num_entries = 0;
+ cfg->num_sections++;
+
+ return 0;
+}
+
+int rte_cfgfile_add_entry(struct rte_cfgfile *cfg,
+ const char *sectionname, const char *entryname,
+ const char *entryvalue)
+{
+ int ret;
+
+ if ((cfg == NULL) || (sectionname == NULL) || (entryname == NULL)
+ || (entryvalue == NULL))
+ return -EINVAL;
+
+ if (rte_cfgfile_has_entry(cfg, sectionname, entryname) != 0)
+ return -EEXIST;
+
+ /* search for section pointer by sectionname */
+ struct rte_cfgfile_section *curr_section = _get_section(cfg,
+ sectionname);
+ if (curr_section == NULL)
+ return -EINVAL;
+
+ ret = _add_entry(curr_section, entryname, entryvalue);
+
+ return ret;
+}
+
+int rte_cfgfile_set_entry(struct rte_cfgfile *cfg, const char *sectionname,
+ const char *entryname, const char *entryvalue)
+{
+ int i;
+
+ if ((cfg == NULL) || (sectionname == NULL) || (entryname == NULL))
+ return -EINVAL;
+
+ /* search for section pointer by sectionname */
+ struct rte_cfgfile_section *curr_section = _get_section(cfg,
+ sectionname);
+ if (curr_section == NULL)
+ return -EINVAL;
+
+ if (entryvalue == NULL)
+ entryvalue = "";
+
+ for (i = 0; i < curr_section->num_entries; i++)
+ if (!strcmp(curr_section->entries[i].name, entryname)) {
+ snprintf(curr_section->entries[i].value,
+ sizeof(curr_section->entries[i].value),
+ "%s", entryvalue);
+ return 0;
+ }
+ printf("Error - entry name doesn't exist\n");
+ return -EINVAL;
+}
+
+int rte_cfgfile_save(struct rte_cfgfile *cfg, const char *filename)
{
int i, j;
+ if ((cfg == NULL) || (filename == NULL))
+ return -EINVAL;
+
+ FILE *f = fopen(filename, "w");
+
+ if (f == NULL)
+ return -EINVAL;
+
+ for (i = 0; i < cfg->num_sections; i++) {
+ fprintf(f, "[%s]\n", cfg->sections[i].name);
+
+ for (j = 0; j < cfg->sections[i].num_entries; j++) {
+ fprintf(f, "%s=%s\n",
+ cfg->sections[i].entries[j].name,
+ cfg->sections[i].entries[j].value);
+ }
+ }
+ return fclose(f);
+}
+
+int rte_cfgfile_close(struct rte_cfgfile *cfg)
+{
+ int i;
+
if (cfg == NULL)
return -1;
- for (i = 0; i < cfg->num_sections; i++) {
- if (cfg->sections[i] != NULL) {
- if (cfg->sections[i]->num_entries) {
- for (j = 0; j < cfg->sections[i]->num_entries;
- j++) {
- if (cfg->sections[i]->entries[j] !=
- NULL)
- free(cfg->sections[i]->
- entries[j]);
- }
+ if (cfg->sections != NULL) {
+ for (i = 0; i < cfg->allocated_sections; i++) {
+ if (cfg->sections[i].entries != NULL) {
+ free(cfg->sections[i].entries);
+ cfg->sections[i].entries = NULL;
}
- free(cfg->sections[i]);
}
+ free(cfg->sections);
+ cfg->sections = NULL;
}
free(cfg);
+ cfg = NULL;
return 0;
}
@@ -361,7 +484,7 @@ size_t length)
int i;
int num_sections = 0;
for (i = 0; i < cfg->num_sections; i++) {
- if (strncmp(cfg->sections[i]->name, sectionname, length) == 0)
+ if (strncmp(cfg->sections[i].name, sectionname, length) == 0)
num_sections++;
}
return num_sections;
@@ -375,23 +498,11 @@ rte_cfgfile_sections(struct rte_cfgfile *cfg, char *sections[],
for (i = 0; i < cfg->num_sections && i < max_sections; i++)
snprintf(sections[i], CFG_NAME_LEN, "%s",
- cfg->sections[i]->name);
+ cfg->sections[i].name);
return i;
}
-static const struct rte_cfgfile_section *
-_get_section(struct rte_cfgfile *cfg, const char *sectionname)
-{
- int i;
- for (i = 0; i < cfg->num_sections; i++) {
- if (strncmp(cfg->sections[i]->name, sectionname,
- sizeof(cfg->sections[0]->name)) == 0)
- return cfg->sections[i];
- }
- return NULL;
-}
-
int
rte_cfgfile_has_section(struct rte_cfgfile *cfg, const char *sectionname)
{
@@ -408,7 +519,18 @@ rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg,
return s->num_entries;
}
+int
+rte_cfgfile_section_num_entries_by_index(struct rte_cfgfile *cfg,
+ char *sectionname, int index)
+{
+ if (index < 0 || index >= cfg->num_sections)
+ return -1;
+ const struct rte_cfgfile_section *sect = &(cfg->sections[index]);
+
+ snprintf(sectionname, CFG_NAME_LEN, "%s", sect->name);
+ return sect->num_entries;
+}
int
rte_cfgfile_section_entries(struct rte_cfgfile *cfg, const char *sectionname,
struct rte_cfgfile_entry *entries, int max_entries)
@@ -418,7 +540,7 @@ rte_cfgfile_section_entries(struct rte_cfgfile *cfg, const char *sectionname,
if (sect == NULL)
return -1;
for (i = 0; i < max_entries && i < sect->num_entries; i++)
- entries[i] = *sect->entries[i];
+ entries[i] = sect->entries[i];
return i;
}
@@ -432,11 +554,10 @@ rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg, int index,
if (index < 0 || index >= cfg->num_sections)
return -1;
-
- sect = cfg->sections[index];
+ sect = &cfg->sections[index];
snprintf(sectionname, CFG_NAME_LEN, "%s", sect->name);
for (i = 0; i < max_entries && i < sect->num_entries; i++)
- entries[i] = *sect->entries[i];
+ entries[i] = sect->entries[i];
return i;
}
@@ -449,9 +570,9 @@ rte_cfgfile_get_entry(struct rte_cfgfile *cfg, const char *sectionname,
if (sect == NULL)
return NULL;
for (i = 0; i < sect->num_entries; i++)
- if (strncmp(sect->entries[i]->name, entryname, CFG_NAME_LEN)
- == 0)
- return sect->entries[i]->value;
+ if (strncmp(sect->entries[i].name, entryname, CFG_NAME_LEN)
+ == 0)
+ return sect->entries[i].value;
return NULL;
}
diff --git a/lib/librte_cfgfile/rte_cfgfile.h b/lib/librte_cfgfile/rte_cfgfile.h
index fa10d408..17f72757 100644
--- a/lib/librte_cfgfile/rte_cfgfile.h
+++ b/lib/librte_cfgfile/rte_cfgfile.h
@@ -121,6 +121,82 @@ struct rte_cfgfile *rte_cfgfile_load_with_params(const char *filename,
int flags, const struct rte_cfgfile_parameters *params);
/**
+ * Create new cfgfile instance with empty sections and entries
+ *
+ * @param flags
+ * - CFG_FLAG_GLOBAL_SECTION
+ * Indicates that the file supports key value entries before the first
+ * defined section. These entries can be accessed in the "GLOBAL"
+ * section.
+ * - CFG_FLAG_EMPTY_VALUES
+ * Indicates that file supports key value entries where the value can
+ * be zero length (e.g., "key=").
+ * @return
+ * Handle to cfgfile instance on success, NULL otherwise
+ */
+struct rte_cfgfile *rte_cfgfile_create(int flags);
+
+/**
+ * Add section in cfgfile instance.
+ *
+ * @param cfg
+ * Pointer to the cfgfile structure.
+ * @param sectionname
+ * Section name which will be add to cfgfile.
+ * @return
+ * 0 on success, -ENOMEM if can't add section
+ */
+int
+rte_cfgfile_add_section(struct rte_cfgfile *cfg, const char *sectionname);
+
+/**
+ * Add entry to specified section in cfgfile instance.
+ *
+ * @param cfg
+ * Pointer to the cfgfile structure.
+ * @param sectionname
+ * Given section name to add an entry.
+ * @param entryname
+ * Entry name to add.
+ * @param entryvalue
+ * Entry value to add.
+ * @return
+ * 0 on success, -EEXIST if entry already exist, -EINVAL if bad argument
+ */
+int rte_cfgfile_add_entry(struct rte_cfgfile *cfg,
+ const char *sectionname, const char *entryname,
+ const char *entryvalue);
+
+/**
+ * Update value of specified entry name in given section in config file
+ *
+ * @param cfg
+ * Config file
+ * @param sectionname
+ * Section name
+ * @param entryname
+ * Entry name to look for the value change
+ * @param entryvalue
+ * New entry value. Can be also an empty string if CFG_FLAG_EMPTY_VALUES = 1
+ * @return
+ * 0 on success, -EINVAL if bad argument
+ */
+int rte_cfgfile_set_entry(struct rte_cfgfile *cfg, const char *sectionname,
+ const char *entryname, const char *entryvalue);
+
+/**
+ * Save object cfgfile to file on disc
+ *
+ * @param cfg
+ * Config file structure
+ * @param filename
+ * File name to save data
+ * @return
+ * 0 on success, errno otherwise
+ */
+int rte_cfgfile_save(struct rte_cfgfile *cfg, const char *filename);
+
+/**
* Get number of sections in config file
*
* @param cfg
@@ -184,6 +260,26 @@ int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg,
const char *sectionname);
/**
+* Get number of entries in given config file section
+*
+* The index of a section is the same as the index of its name in the
+* result of rte_cfgfile_sections. This API can be used when there are
+* multiple sections with the same name.
+*
+* @param cfg
+* Config file
+* @param sectionname
+* Section name
+* @param index
+* Section index
+* @return
+* Number of entries in section on success, -1 otherwise
+*/
+int rte_cfgfile_section_num_entries_by_index(struct rte_cfgfile *cfg,
+ char *sectionname,
+ int index);
+
+/**
* Get section entries as key-value pairs
*
* If multiple sections have the given name this function operates on the
diff --git a/lib/librte_cfgfile/rte_cfgfile_version.map b/lib/librte_cfgfile/rte_cfgfile_version.map
index 5fe60f72..cc4a11f6 100644
--- a/lib/librte_cfgfile/rte_cfgfile_version.map
+++ b/lib/librte_cfgfile/rte_cfgfile_version.map
@@ -27,3 +27,14 @@ DPDK_17.05 {
rte_cfgfile_load_with_params;
} DPDK_16.04;
+
+DPDK_17.11 {
+ global:
+
+ rte_cfgfile_add_entry;
+ rte_cfgfile_add_section;
+ rte_cfgfile_create;
+ rte_cfgfile_save;
+ rte_cfgfile_set_entry;
+
+} DPDK_17.05;
diff --git a/lib/librte_cmdline/Makefile b/lib/librte_cmdline/Makefile
index 644f68e4..2c48e62b 100644
--- a/lib/librte_cmdline/Makefile
+++ b/lib/librte_cmdline/Makefile
@@ -54,6 +54,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_socket.c
SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_parse_portlist.c
CFLAGS += -D_GNU_SOURCE
+LDLIBS += -lrte_eal
# install includes
INCS := cmdline.h cmdline_parse.h cmdline_parse_num.h cmdline_parse_ipaddr.h
diff --git a/lib/librte_cmdline/cmdline.c b/lib/librte_cmdline/cmdline.c
index a9c47be3..d7491651 100644
--- a/lib/librte_cmdline/cmdline.c
+++ b/lib/librte_cmdline/cmdline.c
@@ -205,7 +205,8 @@ cmdline_printf(const struct cmdline *cl, const char *fmt, ...)
}
if (ret >= BUFSIZ)
ret = BUFSIZ - 1;
- write(cl->s_out, buf, ret);
+ ret = write(cl->s_out, buf, ret);
+ (void)ret;
free(buf);
#endif
}
diff --git a/lib/librte_cmdline/cmdline_parse.c b/lib/librte_cmdline/cmdline_parse.c
index 56491eac..3e12ee54 100644
--- a/lib/librte_cmdline/cmdline_parse.c
+++ b/lib/librte_cmdline/cmdline_parse.c
@@ -163,7 +163,7 @@ static int
match_inst(cmdline_parse_inst_t *inst, const char *buf,
unsigned int nb_match_token, void *resbuf, unsigned resbuf_size)
{
- cmdline_parse_token_hdr_t * token_p;
+ cmdline_parse_token_hdr_t *token_p = NULL;
unsigned int i=0;
int n = 0;
struct cmdline_token_hdr token_hdr;
diff --git a/lib/librte_cryptodev/Makefile b/lib/librte_cryptodev/Makefile
index 6ac331bc..8e780b83 100644
--- a/lib/librte_cryptodev/Makefile
+++ b/lib/librte_cryptodev/Makefile
@@ -34,11 +34,13 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_cryptodev.a
# library version
-LIBABIVER := 3
+LIBABIVER := 4
# build flags
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_eal -lrte_mempool -lrte_ring -lrte_mbuf
+LDLIBS += -lrte_kvargs
# library source files
SRCS-y += rte_cryptodev.c rte_cryptodev_pmd.c
@@ -48,8 +50,6 @@ SYMLINK-y-include += rte_crypto.h
SYMLINK-y-include += rte_crypto_sym.h
SYMLINK-y-include += rte_cryptodev.h
SYMLINK-y-include += rte_cryptodev_pmd.h
-SYMLINK-y-include += rte_cryptodev_vdev.h
-SYMLINK-y-include += rte_cryptodev_pci.h
# versioning export map
EXPORT_MAP := rte_cryptodev_version.map
diff --git a/lib/librte_cryptodev/rte_crypto.h b/lib/librte_cryptodev/rte_crypto.h
index 10fe0804..3d672fe7 100644
--- a/lib/librte_cryptodev/rte_crypto.h
+++ b/lib/librte_cryptodev/rte_crypto.h
@@ -86,7 +86,8 @@ enum rte_crypto_op_status {
*/
enum rte_crypto_op_sess_type {
RTE_CRYPTO_OP_WITH_SESSION, /**< Session based crypto operation */
- RTE_CRYPTO_OP_SESSIONLESS /**< Session-less crypto operation */
+ RTE_CRYPTO_OP_SESSIONLESS, /**< Session-less crypto operation */
+ RTE_CRYPTO_OP_SECURITY_SESSION /**< Security session crypto operation */
};
/**
@@ -117,7 +118,7 @@ struct rte_crypto_op {
struct rte_mempool *mempool;
/**< crypto operation mempool which operation is allocated from */
- phys_addr_t phys_addr;
+ rte_iova_t phys_addr;
/**< physical address of crypto operation */
RTE_STD_C11
@@ -144,6 +145,7 @@ __rte_crypto_op_reset(struct rte_crypto_op *op, enum rte_crypto_op_type type)
case RTE_CRYPTO_OP_TYPE_SYMMETRIC:
__rte_crypto_sym_op_reset(op->sym);
break;
+ case RTE_CRYPTO_OP_TYPE_UNDEFINED:
default:
break;
}
diff --git a/lib/librte_cryptodev/rte_crypto_sym.h b/lib/librte_cryptodev/rte_crypto_sym.h
index 0ceaa917..c981f0b9 100644
--- a/lib/librte_cryptodev/rte_crypto_sym.h
+++ b/lib/librte_cryptodev/rte_crypto_sym.h
@@ -160,9 +160,6 @@ struct rte_crypto_cipher_xform {
* Cipher key length is in bytes. For AES it can be 128 bits (16 bytes),
* 192 bits (24 bytes) or 256 bits (32 bytes).
*
- * For the CCM mode of operation, the only supported key length is 128
- * bits (16 bytes).
- *
* For the RTE_CRYPTO_CIPHER_AES_F8 mode of operation, key.length
* should be set to the combined length of the encryption key and the
* keymask. Since the keymask and the encryption key are the same size,
@@ -196,7 +193,9 @@ struct rte_crypto_cipher_xform {
* space for the implementation to write in the flags
* in the first byte). Note that a full 16 bytes should
* be allocated, even though the length field will
- * have a value less than this.
+ * have a value less than this. Note that the PMDs may
+ * modify the memory reserved (the first byte and the
+ * final padding)
*
* - For AES-XTS, this is the 128bit tweak, i, from
* IEEE Std 1619-2007.
@@ -427,7 +426,11 @@ struct rte_crypto_aead_xform {
uint16_t digest_length;
uint16_t aad_length;
- /**< The length of the additional authenticated data (AAD) in bytes. */
+ /**< The length of the additional authenticated data (AAD) in bytes.
+ * For CCM mode, this is the length of the actual AAD, even though
+ * it is required to reserve 18 bytes before the AAD and padding
+ * at the end of it, so a multiple of 16 bytes is allocated.
+ */
};
/** Crypto transformation types */
@@ -505,6 +508,8 @@ struct rte_crypto_sym_op {
/**< Handle for the initialised session context */
struct rte_crypto_sym_xform *xform;
/**< Session-less API crypto operation parameters */
+ struct rte_security_session *sec_session;
+ /**< Handle for the initialised security session context */
};
RTE_STD_C11
@@ -543,7 +548,7 @@ struct rte_crypto_sym_op {
* For GCM (@ref RTE_CRYPTO_AEAD_AES_GCM), for
* "digest result" read "authentication tag T".
*/
- phys_addr_t phys_addr;
+ rte_iova_t phys_addr;
/**< Physical address of digest */
} digest; /**< Digest parameters */
struct {
@@ -555,20 +560,19 @@ struct rte_crypto_sym_op {
* Specifically for CCM (@ref RTE_CRYPTO_AEAD_AES_CCM),
* the caller should setup this field as follows:
*
- * - the nonce should be written starting at an offset
- * of one byte into the array, leaving room for the
- * implementation to write in the flags to the first
- * byte.
- *
- * - the additional authentication data itself should
+ * - the additional authentication data itself should
* be written starting at an offset of 18 bytes into
- * the array, leaving room for the length encoding in
- * the first two bytes of the second block.
+ * the array, leaving room for the first block (16 bytes)
+ * and the length encoding in the first two bytes of the
+ * second block.
*
* - the array should be big enough to hold the above
- * fields, plus any padding to round this up to the
- * nearest multiple of the block size (16 bytes).
- * Padding will be added by the implementation.
+ * fields, plus any padding to round this up to the
+ * nearest multiple of the block size (16 bytes).
+ * Padding will be added by the implementation.
+ *
+ * - Note that PMDs may modify the memory reserved
+ * (first 18 bytes and the final padding).
*
* Finally, for GCM (@ref RTE_CRYPTO_AEAD_AES_GCM), the
* caller should setup this field as follows:
@@ -579,7 +583,7 @@ struct rte_crypto_sym_op {
* of the block size (16 bytes).
*
*/
- phys_addr_t phys_addr; /**< physical address */
+ rte_iova_t phys_addr; /**< physical address */
} aad;
/**< Additional authentication parameters */
} aead;
@@ -676,7 +680,7 @@ struct rte_crypto_sym_op {
* will overwrite any data at this location.
*
*/
- phys_addr_t phys_addr;
+ rte_iova_t phys_addr;
/**< Physical address of digest */
} digest; /**< Digest parameters */
} auth;
diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c
index 327d7e84..b40c0282 100644
--- a/lib/librte_cryptodev/rte_cryptodev.c
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -377,12 +377,6 @@ rte_cryptodev_get_feature_name(uint64_t flag)
}
}
-int
-rte_cryptodev_create_vdev(const char *name, const char *args)
-{
- return rte_vdev_init(name, args);
-}
-
struct rte_cryptodev *
rte_cryptodev_pmd_get_dev(uint8_t dev_id)
{
@@ -488,6 +482,16 @@ rte_cryptodev_devices_get(const char *driver_name, uint8_t *devices,
return count;
}
+void *
+rte_cryptodev_get_sec_ctx(uint8_t dev_id)
+{
+ if (rte_crypto_devices[dev_id].feature_flags &
+ RTE_CRYPTODEV_FF_SECURITY)
+ return rte_crypto_devices[dev_id].security_ctx;
+
+ return NULL;
+}
+
int
rte_cryptodev_socket_id(uint8_t dev_id)
{
@@ -583,6 +587,9 @@ rte_cryptodev_pmd_allocate(const char *name, int socket_id)
cryptodev->data->socket_id = socket_id;
cryptodev->data->dev_started = 0;
+ /* init user callbacks */
+ TAILQ_INIT(&(cryptodev->link_intr_cbs));
+
cryptodev->attached = RTE_CRYPTODEV_ATTACHED;
cryptodev_globals.nb_devs++;
@@ -1271,7 +1278,7 @@ rte_crypto_op_init(struct rte_mempool *mempool,
__rte_crypto_op_reset(op, type);
- op->phys_addr = rte_mem_virt2phy(_op_data);
+ op->phys_addr = rte_mem_virt2iova(_op_data);
op->mempool = mempool;
}
@@ -1362,12 +1369,6 @@ TAILQ_HEAD(cryptodev_driver_list, cryptodev_driver);
static struct cryptodev_driver_list cryptodev_driver_list =
TAILQ_HEAD_INITIALIZER(cryptodev_driver_list);
-struct cryptodev_driver {
- TAILQ_ENTRY(cryptodev_driver) next; /**< Next in list. */
- const struct rte_driver *driver;
- uint8_t id;
-};
-
int
rte_cryptodev_driver_id_get(const char *name)
{
@@ -1388,6 +1389,17 @@ rte_cryptodev_driver_id_get(const char *name)
}
const char *
+rte_cryptodev_name_get(uint8_t dev_id)
+{
+ struct rte_cryptodev *dev = rte_cryptodev_pmd_get_dev(dev_id);
+
+ if (dev == NULL)
+ return NULL;
+
+ return dev->data->name;
+}
+
+const char *
rte_cryptodev_driver_name_get(uint8_t driver_id)
{
struct cryptodev_driver *driver;
@@ -1399,15 +1411,13 @@ rte_cryptodev_driver_name_get(uint8_t driver_id)
}
uint8_t
-rte_cryptodev_allocate_driver(const struct rte_driver *drv)
+rte_cryptodev_allocate_driver(struct cryptodev_driver *crypto_drv,
+ const struct rte_driver *drv)
{
- struct cryptodev_driver *driver;
-
- driver = malloc(sizeof(*driver));
- driver->driver = drv;
- driver->id = nb_drivers;
+ crypto_drv->driver = drv;
+ crypto_drv->id = nb_drivers;
- TAILQ_INSERT_TAIL(&cryptodev_driver_list, driver, next);
+ TAILQ_INSERT_TAIL(&cryptodev_driver_list, crypto_drv, next);
return nb_drivers++;
}
diff --git a/lib/librte_cryptodev/rte_cryptodev.h b/lib/librte_cryptodev/rte_cryptodev.h
index 7ec9c4bc..dade5548 100644
--- a/lib/librte_cryptodev/rte_cryptodev.h
+++ b/lib/librte_cryptodev/rte_cryptodev.h
@@ -49,7 +49,6 @@ extern "C" {
#include "rte_crypto.h"
#include "rte_dev.h"
#include <rte_common.h>
-#include <rte_vdev.h>
extern const char **rte_cyptodev_names;
@@ -60,10 +59,10 @@ extern const char **rte_cyptodev_names;
RTE_FMT("%s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
__func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,)))
-#define CDEV_PMD_LOG_ERR(dev, ...) \
- RTE_LOG(ERR, CRYPTODEV, \
- RTE_FMT("[%s] %s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
- dev, __func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,)))
+#define CDEV_LOG_INFO(...) \
+ RTE_LOG(INFO, CRYPTODEV, \
+ RTE_FMT(RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
+ RTE_FMT_TAIL(__VA_ARGS__,)))
#ifdef RTE_LIBRTE_CRYPTODEV_DEBUG
#define CDEV_LOG_DEBUG(...) \
@@ -111,7 +110,7 @@ extern const char **rte_cyptodev_names;
* to calculate address from.
*/
#define rte_crypto_op_ctophys_offset(c, o) \
- (phys_addr_t)((c)->phys_addr + (o))
+ (rte_iova_t)((c)->phys_addr + (o))
/**
* Crypto parameters range description
@@ -351,6 +350,8 @@ rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum,
/**< Utilises CPU NEON instructions */
#define RTE_CRYPTODEV_FF_CPU_ARM_CE (1ULL << 11)
/**< Utilises ARM CPU Cryptographic Extensions */
+#define RTE_CRYPTODEV_FF_SECURITY (1ULL << 12)
+/**< Support Security Protocol Processing */
/**
@@ -434,33 +435,29 @@ struct rte_cryptodev_stats {
/**< Max length of name of crypto PMD */
/**
- * @deprecated
- *
- * Create a virtual crypto device
+ * Get the device identifier for the named crypto device.
*
- * @param name Cryptodev PMD name of device to be created.
- * @param args Options arguments for device.
+ * @param name device name to select the device structure.
*
* @return
- * - On successful creation of the cryptodev the device index is returned,
- * which will be between 0 and rte_cryptodev_count().
- * - In the case of a failure, returns -1.
+ * - Returns crypto device identifier on success.
+ * - Return -1 on failure to find named crypto device.
*/
-__rte_deprecated
extern int
-rte_cryptodev_create_vdev(const char *name, const char *args);
+rte_cryptodev_get_dev_id(const char *name);
/**
- * Get the device identifier for the named crypto device.
+ * Get the crypto device name given a device identifier.
*
- * @param name device name to select the device structure.
+ * @param dev_id
+ * The identifier of the device
*
* @return
- * - Returns crypto device identifier on success.
- * - Return -1 on failure to find named crypto device.
+ * - Returns crypto device name.
+ * - Returns NULL if crypto device is not present.
*/
-extern int
-rte_cryptodev_get_dev_id(const char *name);
+extern const char *
+rte_cryptodev_name_get(uint8_t dev_id);
/**
* Get the total number of crypto devices that have been successfully
@@ -676,6 +673,11 @@ rte_cryptodev_stats_reset(uint8_t dev_id);
* @param dev_info A pointer to a structure of type
* *rte_cryptodev_info* to be filled with the
* contextual information of the device.
+ *
+ * @note The capabilities field of dev_info is set to point to the first
+ * element of an array of struct rte_cryptodev_capabilities. The element after
+ * the last valid element has it's op field set to
+ * RTE_CRYPTO_OP_TYPE_UNDEFINED.
*/
extern void
rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info);
@@ -756,11 +758,17 @@ struct rte_cryptodev {
struct rte_cryptodev_cb_list link_intr_cbs;
/**< User application callback for interrupts if present */
+ void *security_ctx;
+ /**< Context for security ops */
+
__extension__
uint8_t attached : 1;
/**< Flag indicating the device is attached */
} __rte_cache_aligned;
+void *
+rte_cryptodev_get_sec_ctx(uint8_t dev_id);
+
/**
*
* The data part, with no function pointers, associated with each device.
@@ -1025,26 +1033,6 @@ int rte_cryptodev_driver_id_get(const char *name);
*/
const char *rte_cryptodev_driver_name_get(uint8_t driver_id);
-/**
- * @internal
- * Allocate Cryptodev driver.
- *
- * @param driver
- * Pointer to rte_driver.
- * @return
- * The driver type identifier
- */
-uint8_t rte_cryptodev_allocate_driver(const struct rte_driver *driver);
-
-
-#define RTE_PMD_REGISTER_CRYPTO_DRIVER(drv, driver_id)\
-RTE_INIT(init_ ##driver_id);\
-static void init_ ##driver_id(void)\
-{\
- driver_id = rte_cryptodev_allocate_driver(&(drv).driver);\
-}
-
-
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_cryptodev/rte_cryptodev_pci.h b/lib/librte_cryptodev/rte_cryptodev_pci.h
deleted file mode 100644
index 67eda96a..00000000
--- a/lib/librte_cryptodev/rte_cryptodev_pci.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2017 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of the copyright holder nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _RTE_CRYPTODEV_PCI_H_
-#define _RTE_CRYPTODEV_PCI_H_
-
-#include <rte_pci.h>
-#include "rte_cryptodev.h"
-
-/**
- * Initialisation function of a crypto driver invoked for each matching
- * crypto PCI device detected during the PCI probing phase.
- *
- * @param dev The dev pointer is the address of the *rte_cryptodev*
- * structure associated with the matching device and which
- * has been [automatically] allocated in the
- * *rte_crypto_devices* array.
- *
- * @return
- * - 0: Success, the device is properly initialised by the driver.
- * In particular, the driver MUST have set up the *dev_ops* pointer
- * of the *dev* structure.
- * - <0: Error code of the device initialisation failure.
- */
-typedef int (*cryptodev_pci_init_t)(struct rte_cryptodev *dev);
-
-/**
- * Finalisation function of a driver invoked for each matching
- * PCI device detected during the PCI closing phase.
- *
- * @param dev The dev pointer is the address of the *rte_cryptodev*
- * structure associated with the matching device and which
- * has been [automatically] allocated in the
- * *rte_crypto_devices* array.
- *
- * * @return
- * - 0: Success, the device is properly finalised by the driver.
- * In particular, the driver MUST free the *dev_ops* pointer
- * of the *dev* structure.
- * - <0: Error code of the device initialisation failure.
- */
-typedef int (*cryptodev_pci_uninit_t)(struct rte_cryptodev *dev);
-
-/**
- * @internal
- * Wrapper for use by pci drivers as a .probe function to attach to a crypto
- * interface.
- */
-int
-rte_cryptodev_pci_generic_probe(struct rte_pci_device *pci_dev,
- size_t private_data_size,
- cryptodev_pci_init_t dev_init);
-
-/**
- * @internal
- * Wrapper for use by pci drivers as a .remove function to detach a crypto
- * interface.
- */
-int
-rte_cryptodev_pci_generic_remove(struct rte_pci_device *pci_dev,
- cryptodev_pci_uninit_t dev_uninit);
-
-#endif /* _RTE_CRYPTODEV_PCI_H_ */
diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.c b/lib/librte_cryptodev/rte_cryptodev_pmd.c
index a57faadc..b4eeb448 100644
--- a/lib/librte_cryptodev/rte_cryptodev_pmd.c
+++ b/lib/librte_cryptodev/rte_cryptodev_pmd.c
@@ -32,84 +32,48 @@
#include <rte_malloc.h>
-#include "rte_cryptodev_vdev.h"
-#include "rte_cryptodev_pci.h"
#include "rte_cryptodev_pmd.h"
/**
* Parse name from argument
*/
static int
-rte_cryptodev_vdev_parse_name_arg(const char *key __rte_unused,
+rte_cryptodev_pmd_parse_name_arg(const char *key __rte_unused,
const char *value, void *extra_args)
{
- struct rte_crypto_vdev_init_params *params = extra_args;
+ struct rte_cryptodev_pmd_init_params *params = extra_args;
+ int n;
- if (strlen(value) >= RTE_CRYPTODEV_NAME_MAX_LEN - 1) {
- CDEV_LOG_ERR("Invalid name %s, should be less than "
- "%u bytes", value,
- RTE_CRYPTODEV_NAME_MAX_LEN - 1);
- return -1;
- }
-
- strncpy(params->name, value, RTE_CRYPTODEV_NAME_MAX_LEN);
+ n = snprintf(params->name, RTE_CRYPTODEV_NAME_MAX_LEN, "%s", value);
+ if (n >= RTE_CRYPTODEV_NAME_MAX_LEN)
+ return -EINVAL;
return 0;
}
/**
- * Parse integer from argument
+ * Parse unsigned integer from argument
*/
static int
-rte_cryptodev_vdev_parse_integer_arg(const char *key __rte_unused,
+rte_cryptodev_pmd_parse_uint_arg(const char *key __rte_unused,
const char *value, void *extra_args)
{
- int *i = extra_args;
+ int i;
+ char *end;
+ errno = 0;
- *i = atoi(value);
- if (*i < 0) {
- CDEV_LOG_ERR("Argument has to be positive.");
- return -1;
- }
+ i = strtol(value, &end, 10);
+ if (*end != 0 || errno != 0 || i < 0)
+ return -EINVAL;
+ *((uint32_t *)extra_args) = i;
return 0;
}
-struct rte_cryptodev *
-rte_cryptodev_vdev_pmd_init(const char *name, size_t dev_private_size,
- int socket_id, struct rte_vdev_device *vdev)
-{
- struct rte_cryptodev *cryptodev;
-
- /* allocate device structure */
- cryptodev = rte_cryptodev_pmd_allocate(name, socket_id);
- if (cryptodev == NULL)
- return NULL;
-
- /* allocate private device structure */
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- cryptodev->data->dev_private =
- rte_zmalloc_socket("cryptodev device private",
- dev_private_size,
- RTE_CACHE_LINE_SIZE,
- socket_id);
-
- if (cryptodev->data->dev_private == NULL)
- rte_panic("Cannot allocate memzone for private device"
- " data");
- }
-
- cryptodev->device = &vdev->device;
-
- /* initialise user call-back tail queue */
- TAILQ_INIT(&(cryptodev->link_intr_cbs));
-
- return cryptodev;
-}
-
int
-rte_cryptodev_vdev_parse_init_params(struct rte_crypto_vdev_init_params *params,
- const char *input_args)
+rte_cryptodev_pmd_parse_input_args(
+ struct rte_cryptodev_pmd_init_params *params,
+ const char *args)
{
struct rte_kvargs *kvlist = NULL;
int ret = 0;
@@ -117,35 +81,36 @@ rte_cryptodev_vdev_parse_init_params(struct rte_crypto_vdev_init_params *params,
if (params == NULL)
return -EINVAL;
- if (input_args) {
- kvlist = rte_kvargs_parse(input_args,
- cryptodev_vdev_valid_params);
+ if (args) {
+ kvlist = rte_kvargs_parse(args, cryptodev_pmd_valid_params);
if (kvlist == NULL)
- return -1;
+ return -EINVAL;
ret = rte_kvargs_process(kvlist,
- RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG,
- &rte_cryptodev_vdev_parse_integer_arg,
- &params->max_nb_queue_pairs);
+ RTE_CRYPTODEV_PMD_MAX_NB_QP_ARG,
+ &rte_cryptodev_pmd_parse_uint_arg,
+ &params->max_nb_queue_pairs);
if (ret < 0)
goto free_kvlist;
ret = rte_kvargs_process(kvlist,
- RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG,
- &rte_cryptodev_vdev_parse_integer_arg,
- &params->max_nb_sessions);
+ RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG,
+ &rte_cryptodev_pmd_parse_uint_arg,
+ &params->max_nb_sessions);
if (ret < 0)
goto free_kvlist;
- ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_SOCKET_ID,
- &rte_cryptodev_vdev_parse_integer_arg,
- &params->socket_id);
+ ret = rte_kvargs_process(kvlist,
+ RTE_CRYPTODEV_PMD_SOCKET_ID_ARG,
+ &rte_cryptodev_pmd_parse_uint_arg,
+ &params->socket_id);
if (ret < 0)
goto free_kvlist;
- ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_NAME,
- &rte_cryptodev_vdev_parse_name_arg,
- params);
+ ret = rte_kvargs_process(kvlist,
+ RTE_CRYPTODEV_PMD_NAME_ARG,
+ &rte_cryptodev_pmd_parse_name_arg,
+ params);
if (ret < 0)
goto free_kvlist;
}
@@ -155,93 +120,80 @@ free_kvlist:
return ret;
}
-int
-rte_cryptodev_pci_generic_probe(struct rte_pci_device *pci_dev,
- size_t private_data_size,
- cryptodev_pci_init_t dev_init)
+struct rte_cryptodev *
+rte_cryptodev_pmd_create(const char *name,
+ struct rte_device *device,
+ struct rte_cryptodev_pmd_init_params *params)
{
struct rte_cryptodev *cryptodev;
- char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN];
+ if (params->name[0] != '\0') {
+ CDEV_LOG_INFO("[%s] User specified device name = %s\n",
+ device->driver->name, params->name);
+ name = params->name;
+ }
- int retval;
+ CDEV_LOG_INFO("[%s] - Creating cryptodev %s\n",
+ device->driver->name, name);
- rte_pci_device_name(&pci_dev->addr, cryptodev_name,
- sizeof(cryptodev_name));
+ CDEV_LOG_INFO("[%s] - Initialisation parameters - name: %s,"
+ "socket id: %d, max queue pairs: %u, max sessions: %u",
+ device->driver->name, name,
+ params->socket_id, params->max_nb_queue_pairs,
+ params->max_nb_sessions);
- cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, rte_socket_id());
- if (cryptodev == NULL)
- return -ENOMEM;
+ /* allocate device structure */
+ cryptodev = rte_cryptodev_pmd_allocate(name, params->socket_id);
+ if (cryptodev == NULL) {
+ CDEV_LOG_ERR("[%s] Failed to allocate crypto device for %s",
+ device->driver->name, name);
+ return NULL;
+ }
+ /* allocate private device structure */
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
cryptodev->data->dev_private =
- rte_zmalloc_socket(
- "cryptodev private structure",
- private_data_size,
+ rte_zmalloc_socket("cryptodev device private",
+ params->private_data_size,
RTE_CACHE_LINE_SIZE,
- rte_socket_id());
+ params->socket_id);
+
+ if (cryptodev->data->dev_private == NULL) {
+ CDEV_LOG_ERR("[%s] Cannot allocate memory for "
+ "cryptodev %s private data",
+ device->driver->name, name);
- if (cryptodev->data->dev_private == NULL)
- rte_panic("Cannot allocate memzone for private "
- "device data");
+ rte_cryptodev_pmd_release_device(cryptodev);
+ return NULL;
+ }
}
- cryptodev->device = &pci_dev->device;
+ cryptodev->device = device;
- /* init user callbacks */
+ /* initialise user call-back tail queue */
TAILQ_INIT(&(cryptodev->link_intr_cbs));
- /* Invoke PMD device initialization function */
- RTE_FUNC_PTR_OR_ERR_RET(*dev_init, -EINVAL);
- retval = dev_init(cryptodev);
- if (retval == 0)
- return 0;
-
- CDEV_LOG_ERR("driver %s: crypto_dev_init(vendor_id=0x%x device_id=0x%x)"
- " failed", pci_dev->device.driver->name,
- (unsigned int) pci_dev->id.vendor_id,
- (unsigned int) pci_dev->id.device_id);
-
- if (rte_eal_process_type() == RTE_PROC_PRIMARY)
- rte_free(cryptodev->data->dev_private);
-
- /* free crypto device */
- rte_cryptodev_pmd_release_device(cryptodev);
-
- return -ENXIO;
+ return cryptodev;
}
int
-rte_cryptodev_pci_generic_remove(struct rte_pci_device *pci_dev,
- cryptodev_pci_uninit_t dev_uninit)
+rte_cryptodev_pmd_destroy(struct rte_cryptodev *cryptodev)
{
- struct rte_cryptodev *cryptodev;
- char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN];
- int ret;
-
- if (pci_dev == NULL)
- return -EINVAL;
-
- rte_pci_device_name(&pci_dev->addr, cryptodev_name,
- sizeof(cryptodev_name));
-
- cryptodev = rte_cryptodev_pmd_get_named_dev(cryptodev_name);
- if (cryptodev == NULL)
- return -ENODEV;
+ int retval;
- /* Invoke PMD device uninit function */
- if (dev_uninit) {
- ret = dev_uninit(cryptodev);
- if (ret)
- return ret;
- }
+ CDEV_LOG_INFO("[%s] Closing crypto device %s",
+ cryptodev->device->driver->name,
+ cryptodev->device->name);
/* free crypto device */
- rte_cryptodev_pmd_release_device(cryptodev);
+ retval = rte_cryptodev_pmd_release_device(cryptodev);
+ if (retval)
+ return retval;
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
rte_free(cryptodev->data->dev_private);
+
cryptodev->device = NULL;
cryptodev->data = NULL;
diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h
index c983eb21..744405e2 100644
--- a/lib/librte_cryptodev/rte_cryptodev_pmd.h
+++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h
@@ -56,6 +56,35 @@ extern "C" {
#include "rte_crypto.h"
#include "rte_cryptodev.h"
+
+#define RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_QUEUE_PAIRS 8
+#define RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_SESSIONS 2048
+
+#define RTE_CRYPTODEV_PMD_NAME_ARG ("name")
+#define RTE_CRYPTODEV_PMD_MAX_NB_QP_ARG ("max_nb_queue_pairs")
+#define RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG ("max_nb_sessions")
+#define RTE_CRYPTODEV_PMD_SOCKET_ID_ARG ("socket_id")
+
+
+static const char * const cryptodev_pmd_valid_params[] = {
+ RTE_CRYPTODEV_PMD_NAME_ARG,
+ RTE_CRYPTODEV_PMD_MAX_NB_QP_ARG,
+ RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG,
+ RTE_CRYPTODEV_PMD_SOCKET_ID_ARG
+};
+
+/**
+ * @internal
+ * Initialisation parameters for crypto devices
+ */
+struct rte_cryptodev_pmd_init_params {
+ char name[RTE_CRYPTODEV_NAME_MAX_LEN];
+ size_t private_data_size;
+ int socket_id;
+ unsigned int max_nb_queue_pairs;
+ unsigned int max_nb_sessions;
+};
+
/** Global structure used for maintaining state of allocated crypto devices */
struct rte_cryptodev_global {
struct rte_cryptodev *devs; /**< Device information array */
@@ -65,6 +94,13 @@ struct rte_cryptodev_global {
uint8_t max_devs; /**< Max number of devices */
};
+/* Cryptodev driver, containing the driver ID */
+struct cryptodev_driver {
+ TAILQ_ENTRY(cryptodev_driver) next; /**< Next in list. */
+ const struct rte_driver *driver;
+ uint8_t id;
+};
+
/** pointer to global crypto devices data structure. */
extern struct rte_cryptodev_global *rte_cryptodev_globals;
@@ -385,6 +421,63 @@ rte_cryptodev_pmd_allocate(const char *name, int socket_id);
extern int
rte_cryptodev_pmd_release_device(struct rte_cryptodev *cryptodev);
+
+/**
+ * @internal
+ *
+ * PMD assist function to parse initialisation arguments for crypto driver
+ * when creating a new crypto PMD device instance.
+ *
+ * PMD driver should set default values for that PMD before calling function,
+ * these default values will be over-written with successfully parsed values
+ * from args string.
+ *
+ * @param params parsed PMD initialisation parameters
+ * @param args input argument string to parse
+ *
+ * @return
+ * - 0 on success
+ * - errno on failure
+ */
+int
+rte_cryptodev_pmd_parse_input_args(
+ struct rte_cryptodev_pmd_init_params *params,
+ const char *args);
+
+/**
+ * @internal
+ *
+ * PMD assist function to provide boiler plate code for crypto driver to create
+ * and allocate resources for a new crypto PMD device instance.
+ *
+ * @param name crypto device name.
+ * @param device base device instance
+ * @param params PMD initialisation parameters
+ *
+ * @return
+ * - crypto device instance on success
+ * - NULL on creation failure
+ */
+struct rte_cryptodev *
+rte_cryptodev_pmd_create(const char *name,
+ struct rte_device *device,
+ struct rte_cryptodev_pmd_init_params *params);
+
+/**
+ * @internal
+ *
+ * PMD assist function to provide boiler plate code for crypto driver to
+ * destroy and free resources associated with a crypto PMD device instance.
+ *
+ * @param cryptodev crypto device handle.
+ *
+ * @return
+ * - 0 on success
+ * - errno on failure
+ */
+int
+rte_cryptodev_pmd_destroy(struct rte_cryptodev *cryptodev);
+
/**
* Executes all the user application registered callbacks for the specific
* device.
@@ -405,6 +498,29 @@ void rte_cryptodev_pmd_callback_process(struct rte_cryptodev *dev,
int
rte_cryptodev_pmd_create_dev_name(char *name, const char *dev_name_prefix);
+/**
+ * @internal
+ * Allocate Cryptodev driver.
+ *
+ * @param crypto_drv
+ * Pointer to cryptodev_driver.
+ * @param drv
+ * Pointer to rte_driver.
+ *
+ * @return
+ * The driver type identifier
+ */
+uint8_t rte_cryptodev_allocate_driver(struct cryptodev_driver *crypto_drv,
+ const struct rte_driver *drv);
+
+
+#define RTE_PMD_REGISTER_CRYPTO_DRIVER(crypto_drv, drv, driver_id)\
+RTE_INIT(init_ ##driver_id);\
+static void init_ ##driver_id(void)\
+{\
+ driver_id = rte_cryptodev_allocate_driver(&crypto_drv, &(drv).driver);\
+}
+
static inline void *
get_session_private_data(const struct rte_cryptodev_sym_session *sess,
uint8_t driver_id) {
diff --git a/lib/librte_cryptodev/rte_cryptodev_vdev.h b/lib/librte_cryptodev/rte_cryptodev_vdev.h
deleted file mode 100644
index 94ab9d33..00000000
--- a/lib/librte_cryptodev/rte_cryptodev_vdev.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2017 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of the copyright holder nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _RTE_CRYPTODEV_VDEV_H_
-#define _RTE_CRYPTODEV_VDEV_H_
-
-#include <rte_vdev.h>
-#include <inttypes.h>
-
-#include "rte_cryptodev.h"
-
-#define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_QUEUE_PAIRS 8
-#define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_SESSIONS 2048
-
-#define RTE_CRYPTODEV_VDEV_NAME ("name")
-#define RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG ("max_nb_queue_pairs")
-#define RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG ("max_nb_sessions")
-#define RTE_CRYPTODEV_VDEV_SOCKET_ID ("socket_id")
-
-static const char * const cryptodev_vdev_valid_params[] = {
- RTE_CRYPTODEV_VDEV_NAME,
- RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG,
- RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG,
- RTE_CRYPTODEV_VDEV_SOCKET_ID
-};
-
-/**
- * @internal
- * Initialisation parameters for virtual crypto devices
- */
-struct rte_crypto_vdev_init_params {
- unsigned int max_nb_queue_pairs;
- unsigned int max_nb_sessions;
- uint8_t socket_id;
- char name[RTE_CRYPTODEV_NAME_MAX_LEN];
-};
-
-/**
- * @internal
- * Creates a new virtual crypto device and returns the pointer
- * to that device.
- *
- * @param name PMD type name
- * @param dev_private_size Size of crypto PMDs private data
- * @param socket_id Socket to allocate resources on.
- * @param vdev Pointer to virtual device structure.
- *
- * @return
- * - Cryptodev pointer if device is successfully created.
- * - NULL if device cannot be created.
- */
-struct rte_cryptodev *
-rte_cryptodev_vdev_pmd_init(const char *name, size_t dev_private_size,
- int socket_id, struct rte_vdev_device *vdev);
-
-/**
- * @internal
- * Parse virtual device initialisation parameters input arguments
- *
- * @params params Initialisation parameters with defaults set.
- * @params input_args Command line arguments
- *
- * @return
- * 0 on successful parse
- * <0 on failure to parse
- */
-int
-rte_cryptodev_vdev_parse_init_params(struct rte_crypto_vdev_init_params *params,
- const char *input_args);
-
-#endif /* _RTE_CRYPTODEV_VDEV_H_ */
diff --git a/lib/librte_cryptodev/rte_cryptodev_version.map b/lib/librte_cryptodev/rte_cryptodev_version.map
index e9ba88ac..eb47308b 100644
--- a/lib/librte_cryptodev/rte_cryptodev_version.map
+++ b/lib/librte_cryptodev/rte_cryptodev_version.map
@@ -7,7 +7,6 @@ DPDK_16.04 {
rte_cryptodev_close;
rte_cryptodev_count;
rte_cryptodev_configure;
- rte_cryptodev_create_vdev;
rte_cryptodev_get_dev_id;
rte_cryptodev_get_feature_name;
rte_cryptodev_info_get;
@@ -68,14 +67,21 @@ DPDK_17.08 {
rte_cryptodev_get_aead_algo_enum;
rte_cryptodev_get_header_session_size;
rte_cryptodev_get_private_session_size;
- rte_cryptodev_pci_generic_probe;
- rte_cryptodev_pci_generic_remove;
rte_cryptodev_sym_capability_check_aead;
rte_cryptodev_sym_session_init;
rte_cryptodev_sym_session_clear;
- rte_cryptodev_vdev_parse_init_params;
- rte_cryptodev_vdev_pmd_init;
rte_crypto_aead_algorithm_strings;
rte_crypto_aead_operation_strings;
} DPDK_17.05;
+
+DPDK_17.11 {
+ global:
+
+ rte_cryptodev_get_sec_ctx;
+ rte_cryptodev_name_get;
+ rte_cryptodev_pmd_create;
+ rte_cryptodev_pmd_destroy;
+ rte_cryptodev_pmd_parse_input_args;
+
+} DPDK_17.08;
diff --git a/lib/librte_distributor/Makefile b/lib/librte_distributor/Makefile
index b417ee7b..fee00121 100644
--- a/lib/librte_distributor/Makefile
+++ b/lib/librte_distributor/Makefile
@@ -36,6 +36,7 @@ LIB = librte_distributor.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_ethdev
EXPORT_MAP := rte_distributor_version.map
diff --git a/lib/librte_distributor/rte_distributor.c b/lib/librte_distributor/rte_distributor.c
index 20ba9ffb..57ad3397 100644
--- a/lib/librte_distributor/rte_distributor.c
+++ b/lib/librte_distributor/rte_distributor.c
@@ -432,7 +432,7 @@ rte_distributor_process_v1705(struct rte_distributor *d,
next_value = (((int64_t)(uintptr_t)next_mb) <<
RTE_DISTRIB_FLAG_BITS);
/*
- * User is advocated to set tag vaue for each
+ * User is advocated to set tag value for each
* mbuf before calling rte_distributor_process.
* User defined tags are used to identify flows,
* or sessions.
diff --git a/lib/librte_distributor/rte_distributor_v20.c b/lib/librte_distributor/rte_distributor_v20.c
index b09abecd..9adda52b 100644
--- a/lib/librte_distributor/rte_distributor_v20.c
+++ b/lib/librte_distributor/rte_distributor_v20.c
@@ -244,7 +244,7 @@ rte_distributor_process_v20(struct rte_distributor_v20 *d,
next_value = (((int64_t)(uintptr_t)next_mb)
<< RTE_DISTRIB_FLAG_BITS);
/*
- * User is advocated to set tag vaue for each
+ * User is advocated to set tag value for each
* mbuf before calling rte_distributor_process.
* User defined tags are used to identify flows,
* or sessions.
diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index 005019ed..afa117de 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -46,16 +46,15 @@ LDLIBS += -lexecinfo
LDLIBS += -lpthread
LDLIBS += -lgcc_s
-EXPORT_MAP := rte_eal_version.map
+EXPORT_MAP := ../../rte_eal_version.map
-LIBABIVER := 5
+LIBABIVER := 6
# specific to bsdapp exec-env
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_pci.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_timer.c
@@ -68,9 +67,6 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_timer.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memzone.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_log.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_launch.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_vdev.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci_uio.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memory.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_tailqs.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_errno.c
@@ -92,6 +88,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c
# from arch dir
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c
SRCS-$(CONFIG_RTE_ARCH_X86) += rte_spinlock.c
+SRCS-y += rte_cycles.c
CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST)
@@ -107,7 +104,7 @@ CFLAGS_eal_thread.o += -Wno-return-type
CFLAGS_eal_hpet.o += -Wno-return-type
endif
-INC := rte_interrupts.h
+INC := # no bsdapp specific headers
SYMLINK-$(CONFIG_RTE_EXEC_ENV_BSDAPP)-include/exec-env := \
$(addprefix include/exec-env/,$(INC))
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index 5fa59884..369a682a 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -51,7 +51,6 @@
#include <rte_common.h>
#include <rte_debug.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
@@ -66,7 +65,6 @@
#include <rte_cpuflags.h>
#include <rte_interrupts.h>
#include <rte_bus.h>
-#include <rte_pci.h>
#include <rte_dev.h>
#include <rte_devargs.h>
#include <rte_version.h>
@@ -112,6 +110,13 @@ struct internal_config internal_config;
/* used by rte_rdtsc() */
int rte_cycles_vmware_tsc_map;
+/* Return mbuf pool ops name */
+const char *
+rte_eal_mbuf_default_mempool_ops(void)
+{
+ return internal_config.mbuf_pool_ops_name;
+}
+
/* Return a pointer to the configuration structure */
struct rte_config *
rte_eal_get_configuration(void)
@@ -119,6 +124,12 @@ rte_eal_get_configuration(void)
return &rte_config;
}
+enum rte_iova_mode
+rte_eal_iova_mode(void)
+{
+ return rte_eal_get_configuration()->iova_mode;
+}
+
/* parse a sysfs (or other) file containing one integer value */
int
eal_parse_sysfs_value(const char *filename, unsigned long *val)
@@ -385,6 +396,9 @@ eal_parse_args(int argc, char **argv)
continue;
switch (opt) {
+ case OPT_MBUF_POOL_OPS_NAME_NUM:
+ internal_config.mbuf_pool_ops_name = optarg;
+ break;
case 'h':
eal_usage(prgname);
exit(EXIT_SUCCESS);
@@ -535,6 +549,29 @@ rte_eal_init(int argc, char **argv)
return -1;
}
+ if (eal_plugins_init() < 0) {
+ rte_eal_init_alert("Cannot init plugins\n");
+ rte_errno = EINVAL;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ if (eal_option_device_parse()) {
+ rte_errno = ENODEV;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ if (rte_bus_scan()) {
+ rte_eal_init_alert("Cannot scan the buses for devices\n");
+ rte_errno = ENODEV;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ /* autodetect the iova mapping mode (default is iova_pa) */
+ rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class();
+
if (internal_config.no_hugetlbfs == 0 &&
internal_config.process_type != RTE_PROC_SECONDARY &&
eal_hugepage_info_init() < 0) {
@@ -603,9 +640,6 @@ rte_eal_init(int argc, char **argv)
eal_check_mem_on_local_socket();
- if (eal_plugins_init() < 0)
- rte_eal_init_alert("Cannot init plugins\n");
-
eal_thread_init_master(rte_config.master_lcore);
ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
@@ -614,17 +648,6 @@ rte_eal_init(int argc, char **argv)
rte_config.master_lcore, thread_id, cpuset,
ret == 0 ? "" : "...");
- if (eal_option_device_parse()) {
- rte_errno = ENODEV;
- return -1;
- }
-
- if (rte_bus_scan()) {
- rte_eal_init_alert("Cannot scan the buses for devices\n");
- rte_errno = ENODEV;
- return -1;
- }
-
RTE_LCORE_FOREACH_SLAVE(i) {
/*
@@ -698,3 +721,60 @@ rte_eal_process_type(void)
{
return rte_config.process_type;
}
+
+int rte_eal_has_pci(void)
+{
+ return !internal_config.no_pci;
+}
+
+int rte_eal_create_uio_dev(void)
+{
+ return internal_config.create_uio_dev;
+}
+
+enum rte_intr_mode
+rte_eal_vfio_intr_mode(void)
+{
+ return RTE_INTR_MODE_NONE;
+}
+
+/* dummy forward declaration. */
+struct vfio_device_info;
+
+/* dummy prototypes. */
+int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
+ int *vfio_dev_fd, struct vfio_device_info *device_info);
+int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd);
+int rte_vfio_enable(const char *modname);
+int rte_vfio_is_enabled(const char *modname);
+int rte_vfio_noiommu_is_enabled(void);
+
+int rte_vfio_setup_device(__rte_unused const char *sysfs_base,
+ __rte_unused const char *dev_addr,
+ __rte_unused int *vfio_dev_fd,
+ __rte_unused struct vfio_device_info *device_info)
+{
+ return -1;
+}
+
+int rte_vfio_release_device(__rte_unused const char *sysfs_base,
+ __rte_unused const char *dev_addr,
+ __rte_unused int fd)
+{
+ return -1;
+}
+
+int rte_vfio_enable(__rte_unused const char *modname)
+{
+ return -1;
+}
+
+int rte_vfio_is_enabled(__rte_unused const char *modname)
+{
+ return 0;
+}
+
+int rte_vfio_noiommu_is_enabled(void)
+{
+ return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
index ea2afff4..deba8770 100644
--- a/lib/librte_eal/bsdapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
@@ -125,3 +125,38 @@ rte_intr_cap_multiple(struct rte_intr_handle *intr_handle)
RTE_SET_USED(intr_handle);
return 0;
}
+
+int
+rte_epoll_wait(int epfd, struct rte_epoll_event *events,
+ int maxevents, int timeout)
+{
+ RTE_SET_USED(epfd);
+ RTE_SET_USED(events);
+ RTE_SET_USED(maxevents);
+ RTE_SET_USED(timeout);
+
+ return -ENOTSUP;
+}
+
+int
+rte_epoll_ctl(int epfd, int op, int fd, struct rte_epoll_event *event)
+{
+ RTE_SET_USED(epfd);
+ RTE_SET_USED(op);
+ RTE_SET_USED(fd);
+ RTE_SET_USED(event);
+
+ return -ENOTSUP;
+}
+
+int
+rte_intr_tls_epfd(void)
+{
+ return -ENOTSUP;
+}
+
+void
+rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle)
+{
+ RTE_SET_USED(intr_handle);
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c
index 3614da8d..6ba05857 100644
--- a/lib/librte_eal/bsdapp/eal/eal_memory.c
+++ b/lib/librte_eal/bsdapp/eal/eal_memory.c
@@ -54,9 +54,14 @@ phys_addr_t
rte_mem_virt2phy(const void *virtaddr)
{
/* XXX not implemented. This function is only used by
- * rte_mempool_virt2phy() when hugepages are disabled. */
+ * rte_mempool_virt2iova() when hugepages are disabled. */
(void)virtaddr;
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
+}
+rte_iova_t
+rte_mem_virt2iova(const void *virtaddr)
+{
+ return rte_mem_virt2phy(virtaddr);
}
int
@@ -73,7 +78,7 @@ rte_eal_hugepage_init(void)
/* for debug purposes, hugetlbfs can be disabled */
if (internal_config.no_hugetlbfs) {
addr = malloc(internal_config.memory);
- mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr;
+ mcfg->memseg[0].iova = (rte_iova_t)(uintptr_t)addr;
mcfg->memseg[0].addr = addr;
mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K;
mcfg->memseg[0].len = internal_config.memory;
@@ -88,7 +93,7 @@ rte_eal_hugepage_init(void)
hpi = &internal_config.hugepage_info[i];
for (j = 0; j < hpi->num_pages[0]; j++) {
struct rte_memseg *seg;
- uint64_t physaddr;
+ rte_iova_t physaddr;
int error;
size_t sysctl_size = sizeof(physaddr);
char physaddr_str[64];
@@ -114,7 +119,7 @@ rte_eal_hugepage_init(void)
seg = &mcfg->memseg[seg_idx++];
seg->addr = addr;
- seg->phys_addr = physaddr;
+ seg->iova = physaddr;
seg->hugepage_sz = hpi->hugepage_sz;
seg->len = hpi->hugepage_sz;
seg->nchannel = mcfg->nchannel;
@@ -192,3 +197,9 @@ error:
close(fd_hugepage);
return -1;
}
+
+int
+rte_eal_using_phys_addrs(void)
+{
+ return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c
deleted file mode 100644
index 04eacdcc..00000000
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ /dev/null
@@ -1,670 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <ctype.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <unistd.h>
-#include <inttypes.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <dirent.h>
-#include <limits.h>
-#include <sys/queue.h>
-#include <sys/mman.h>
-#include <sys/ioctl.h>
-#include <sys/pciio.h>
-#include <dev/pci/pcireg.h>
-
-#if defined(RTE_ARCH_X86)
-#include <machine/cpufunc.h>
-#endif
-
-#include <rte_interrupts.h>
-#include <rte_log.h>
-#include <rte_pci.h>
-#include <rte_common.h>
-#include <rte_launch.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_eal.h>
-#include <rte_eal_memconfig.h>
-#include <rte_per_lcore.h>
-#include <rte_lcore.h>
-#include <rte_malloc.h>
-#include <rte_string_fns.h>
-#include <rte_debug.h>
-#include <rte_devargs.h>
-
-#include "eal_filesystem.h"
-#include "eal_private.h"
-
-/**
- * @file
- * PCI probing under linux
- *
- * This code is used to simulate a PCI probe by parsing information in
- * sysfs. Moreover, when a registered driver matches a device, the
- * kernel driver currently using it is unloaded and replaced by
- * igb_uio module, which is a very minimal userland driver for Intel
- * network card, only providing access to PCI BAR to applications, and
- * enabling bus master.
- */
-
-extern struct rte_pci_bus rte_pci_bus;
-
-/* Map pci device */
-int
-rte_pci_map_device(struct rte_pci_device *dev)
-{
- int ret = -1;
-
- /* try mapping the NIC resources */
- switch (dev->kdrv) {
- case RTE_KDRV_NIC_UIO:
- /* map resources for devices that use uio */
- ret = pci_uio_map_resource(dev);
- break;
- default:
- RTE_LOG(DEBUG, EAL,
- " Not managed by a supported kernel driver, skipped\n");
- ret = 1;
- break;
- }
-
- return ret;
-}
-
-/* Unmap pci device */
-void
-rte_pci_unmap_device(struct rte_pci_device *dev)
-{
- /* try unmapping the NIC resources */
- switch (dev->kdrv) {
- case RTE_KDRV_NIC_UIO:
- /* unmap resources for devices that use uio */
- pci_uio_unmap_resource(dev);
- break;
- default:
- RTE_LOG(DEBUG, EAL,
- " Not managed by a supported kernel driver, skipped\n");
- break;
- }
-}
-
-void
-pci_uio_free_resource(struct rte_pci_device *dev,
- struct mapped_pci_resource *uio_res)
-{
- rte_free(uio_res);
-
- if (dev->intr_handle.fd) {
- close(dev->intr_handle.fd);
- dev->intr_handle.fd = -1;
- dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
- }
-}
-
-int
-pci_uio_alloc_resource(struct rte_pci_device *dev,
- struct mapped_pci_resource **uio_res)
-{
- char devname[PATH_MAX]; /* contains the /dev/uioX */
- struct rte_pci_addr *loc;
-
- loc = &dev->addr;
-
- snprintf(devname, sizeof(devname), "/dev/uio@pci:%u:%u:%u",
- dev->addr.bus, dev->addr.devid, dev->addr.function);
-
- if (access(devname, O_RDWR) < 0) {
- RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, "
- "skipping\n", loc->domain, loc->bus, loc->devid, loc->function);
- return 1;
- }
-
- /* save fd if in primary process */
- dev->intr_handle.fd = open(devname, O_RDWR);
- if (dev->intr_handle.fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
- devname, strerror(errno));
- goto error;
- }
- dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
-
- /* allocate the mapping details for secondary processes*/
- *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0);
- if (*uio_res == NULL) {
- RTE_LOG(ERR, EAL,
- "%s(): cannot store uio mmap details\n", __func__);
- goto error;
- }
-
- snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname);
- memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr));
-
- return 0;
-
-error:
- pci_uio_free_resource(dev, *uio_res);
- return -1;
-}
-
-int
-pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
- struct mapped_pci_resource *uio_res, int map_idx)
-{
- int fd;
- char *devname;
- void *mapaddr;
- uint64_t offset;
- uint64_t pagesz;
- struct pci_map *maps;
-
- maps = uio_res->maps;
- devname = uio_res->path;
- pagesz = sysconf(_SC_PAGESIZE);
-
- /* allocate memory to keep path */
- maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
- if (maps[map_idx].path == NULL) {
- RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
- strerror(errno));
- return -1;
- }
-
- /*
- * open resource file, to mmap it
- */
- fd = open(devname, O_RDWR);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
- devname, strerror(errno));
- goto error;
- }
-
- /* if matching map is found, then use it */
- offset = res_idx * pagesz;
- mapaddr = pci_map_resource(NULL, fd, (off_t)offset,
- (size_t)dev->mem_resource[res_idx].len, 0);
- close(fd);
- if (mapaddr == MAP_FAILED)
- goto error;
-
- maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
- maps[map_idx].size = dev->mem_resource[res_idx].len;
- maps[map_idx].addr = mapaddr;
- maps[map_idx].offset = offset;
- strcpy(maps[map_idx].path, devname);
- dev->mem_resource[res_idx].addr = mapaddr;
-
- return 0;
-
-error:
- rte_free(maps[map_idx].path);
- return -1;
-}
-
-static int
-pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
-{
- struct rte_pci_device *dev;
- struct pci_bar_io bar;
- unsigned i, max;
-
- dev = malloc(sizeof(*dev));
- if (dev == NULL) {
- return -1;
- }
-
- memset(dev, 0, sizeof(*dev));
- dev->addr.domain = conf->pc_sel.pc_domain;
- dev->addr.bus = conf->pc_sel.pc_bus;
- dev->addr.devid = conf->pc_sel.pc_dev;
- dev->addr.function = conf->pc_sel.pc_func;
-
- /* get vendor id */
- dev->id.vendor_id = conf->pc_vendor;
-
- /* get device id */
- dev->id.device_id = conf->pc_device;
-
- /* get subsystem_vendor id */
- dev->id.subsystem_vendor_id = conf->pc_subvendor;
-
- /* get subsystem_device id */
- dev->id.subsystem_device_id = conf->pc_subdevice;
-
- /* get class id */
- dev->id.class_id = (conf->pc_class << 16) |
- (conf->pc_subclass << 8) |
- (conf->pc_progif);
-
- /* TODO: get max_vfs */
- dev->max_vfs = 0;
-
- /* FreeBSD has no NUMA support (yet) */
- dev->device.numa_node = 0;
-
- pci_name_set(dev);
-
- /* FreeBSD has only one pass through driver */
- dev->kdrv = RTE_KDRV_NIC_UIO;
-
- /* parse resources */
- switch (conf->pc_hdr & PCIM_HDRTYPE) {
- case PCIM_HDRTYPE_NORMAL:
- max = PCIR_MAX_BAR_0;
- break;
- case PCIM_HDRTYPE_BRIDGE:
- max = PCIR_MAX_BAR_1;
- break;
- case PCIM_HDRTYPE_CARDBUS:
- max = PCIR_MAX_BAR_2;
- break;
- default:
- goto skipdev;
- }
-
- for (i = 0; i <= max; i++) {
- bar.pbi_sel = conf->pc_sel;
- bar.pbi_reg = PCIR_BAR(i);
- if (ioctl(dev_pci_fd, PCIOCGETBAR, &bar) < 0)
- continue;
-
- dev->mem_resource[i].len = bar.pbi_length;
- if (PCI_BAR_IO(bar.pbi_base)) {
- dev->mem_resource[i].addr = (void *)(bar.pbi_base & ~((uint64_t)0xf));
- continue;
- }
- dev->mem_resource[i].phys_addr = bar.pbi_base & ~((uint64_t)0xf);
- }
-
- /* device is valid, add in list (sorted) */
- if (TAILQ_EMPTY(&rte_pci_bus.device_list)) {
- rte_pci_add_device(dev);
- }
- else {
- struct rte_pci_device *dev2 = NULL;
- int ret;
-
- TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) {
- ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr);
- if (ret > 0)
- continue;
- else if (ret < 0) {
- rte_pci_insert_device(dev2, dev);
- } else { /* already registered */
- dev2->kdrv = dev->kdrv;
- dev2->max_vfs = dev->max_vfs;
- pci_name_set(dev2);
- memmove(dev2->mem_resource,
- dev->mem_resource,
- sizeof(dev->mem_resource));
- free(dev);
- }
- return 0;
- }
- rte_pci_add_device(dev);
- }
-
- return 0;
-
-skipdev:
- free(dev);
- return 0;
-}
-
-/*
- * Scan the content of the PCI bus, and add the devices in the devices
- * list. Call pci_scan_one() for each pci entry found.
- */
-int
-rte_pci_scan(void)
-{
- int fd;
- unsigned dev_count = 0;
- struct pci_conf matches[16];
- struct pci_conf_io conf_io = {
- .pat_buf_len = 0,
- .num_patterns = 0,
- .patterns = NULL,
- .match_buf_len = sizeof(matches),
- .matches = &matches[0],
- };
-
- /* for debug purposes, PCI can be disabled */
- if (internal_config.no_pci)
- return 0;
-
- fd = open("/dev/pci", O_RDONLY);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
- goto error;
- }
-
- do {
- unsigned i;
- if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) {
- RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n",
- __func__, strerror(errno));
- goto error;
- }
-
- for (i = 0; i < conf_io.num_matches; i++)
- if (pci_scan_one(fd, &matches[i]) < 0)
- goto error;
-
- dev_count += conf_io.num_matches;
- } while(conf_io.status == PCI_GETCONF_MORE_DEVS);
-
- close(fd);
-
- RTE_LOG(DEBUG, EAL, "PCI scan found %u devices\n", dev_count);
- return 0;
-
-error:
- if (fd >= 0)
- close(fd);
- return -1;
-}
-
-int
-pci_update_device(const struct rte_pci_addr *addr)
-{
- int fd;
- struct pci_conf matches[2];
- struct pci_match_conf match = {
- .pc_sel = {
- .pc_domain = addr->domain,
- .pc_bus = addr->bus,
- .pc_dev = addr->devid,
- .pc_func = addr->function,
- },
- };
- struct pci_conf_io conf_io = {
- .pat_buf_len = 0,
- .num_patterns = 1,
- .patterns = &match,
- .match_buf_len = sizeof(matches),
- .matches = &matches[0],
- };
-
- fd = open("/dev/pci", O_RDONLY);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
- goto error;
- }
-
- if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) {
- RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n",
- __func__, strerror(errno));
- goto error;
- }
-
- if (conf_io.num_matches != 1)
- goto error;
-
- if (pci_scan_one(fd, &matches[0]) < 0)
- goto error;
-
- close(fd);
-
- return 0;
-
-error:
- if (fd >= 0)
- close(fd);
- return -1;
-}
-
-/* Read PCI config space. */
-int rte_pci_read_config(const struct rte_pci_device *dev,
- void *buf, size_t len, off_t offset)
-{
- int fd = -1;
- int size;
- struct pci_io pi = {
- .pi_sel = {
- .pc_domain = dev->addr.domain,
- .pc_bus = dev->addr.bus,
- .pc_dev = dev->addr.devid,
- .pc_func = dev->addr.function,
- },
- .pi_reg = offset,
- };
-
- fd = open("/dev/pci", O_RDWR);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
- goto error;
- }
-
- while (len > 0) {
- size = (len >= 4) ? 4 : ((len >= 2) ? 2 : 1);
- pi.pi_width = size;
-
- if (ioctl(fd, PCIOCREAD, &pi) < 0)
- goto error;
- memcpy(buf, &pi.pi_data, size);
-
- buf = (char *)buf + size;
- pi.pi_reg += size;
- len -= size;
- }
- close(fd);
-
- return 0;
-
- error:
- if (fd >= 0)
- close(fd);
- return -1;
-}
-
-/* Write PCI config space. */
-int rte_pci_write_config(const struct rte_pci_device *dev,
- const void *buf, size_t len, off_t offset)
-{
- int fd = -1;
-
- struct pci_io pi = {
- .pi_sel = {
- .pc_domain = dev->addr.domain,
- .pc_bus = dev->addr.bus,
- .pc_dev = dev->addr.devid,
- .pc_func = dev->addr.function,
- },
- .pi_reg = offset,
- .pi_data = *(const uint32_t *)buf,
- .pi_width = len,
- };
-
- if (len == 3 || len > sizeof(pi.pi_data)) {
- RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__);
- goto error;
- }
-
- memcpy(&pi.pi_data, buf, len);
-
- fd = open("/dev/pci", O_RDWR);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
- goto error;
- }
-
- if (ioctl(fd, PCIOCWRITE, &pi) < 0)
- goto error;
-
- close(fd);
- return 0;
-
- error:
- if (fd >= 0)
- close(fd);
- return -1;
-}
-
-int
-rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p)
-{
- int ret;
-
- switch (dev->kdrv) {
-#if defined(RTE_ARCH_X86)
- case RTE_KDRV_NIC_UIO:
- if ((uintptr_t) dev->mem_resource[bar].addr <= UINT16_MAX) {
- p->base = (uintptr_t)dev->mem_resource[bar].addr;
- ret = 0;
- } else
- ret = -1;
- break;
-#endif
- default:
- ret = -1;
- break;
- }
-
- if (!ret)
- p->dev = dev;
-
- return ret;
-}
-
-static void
-pci_uio_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset)
-{
-#if defined(RTE_ARCH_X86)
- uint8_t *d;
- int size;
- unsigned short reg = p->base + offset;
-
- for (d = data; len > 0; d += size, reg += size, len -= size) {
- if (len >= 4) {
- size = 4;
- *(uint32_t *)d = inl(reg);
- } else if (len >= 2) {
- size = 2;
- *(uint16_t *)d = inw(reg);
- } else {
- size = 1;
- *d = inb(reg);
- }
- }
-#else
- RTE_SET_USED(p);
- RTE_SET_USED(data);
- RTE_SET_USED(len);
- RTE_SET_USED(offset);
-#endif
-}
-
-void
-rte_pci_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset)
-{
- switch (p->dev->kdrv) {
- case RTE_KDRV_NIC_UIO:
- pci_uio_ioport_read(p, data, len, offset);
- break;
- default:
- break;
- }
-}
-
-static void
-pci_uio_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset)
-{
-#if defined(RTE_ARCH_X86)
- const uint8_t *s;
- int size;
- unsigned short reg = p->base + offset;
-
- for (s = data; len > 0; s += size, reg += size, len -= size) {
- if (len >= 4) {
- size = 4;
- outl(reg, *(const uint32_t *)s);
- } else if (len >= 2) {
- size = 2;
- outw(reg, *(const uint16_t *)s);
- } else {
- size = 1;
- outb(reg, *s);
- }
- }
-#else
- RTE_SET_USED(p);
- RTE_SET_USED(data);
- RTE_SET_USED(len);
- RTE_SET_USED(offset);
-#endif
-}
-
-void
-rte_pci_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset)
-{
- switch (p->dev->kdrv) {
- case RTE_KDRV_NIC_UIO:
- pci_uio_ioport_write(p, data, len, offset);
- break;
- default:
- break;
- }
-}
-
-int
-rte_pci_ioport_unmap(struct rte_pci_ioport *p)
-{
- int ret;
-
- switch (p->dev->kdrv) {
-#if defined(RTE_ARCH_X86)
- case RTE_KDRV_NIC_UIO:
- ret = 0;
- break;
-#endif
- default:
- ret = -1;
- break;
- }
-
- return ret;
-}
diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c
index 783d68c5..2a2136a2 100644
--- a/lib/librte_eal/bsdapp/eal/eal_thread.c
+++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
@@ -46,7 +46,6 @@
#include <rte_launch.h>
#include <rte_log.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_per_lcore.h>
#include <rte_eal.h>
#include <rte_lcore.h>
diff --git a/lib/librte_eal/bsdapp/eal/eal_timer.c b/lib/librte_eal/bsdapp/eal/eal_timer.c
index f12d9bd2..14421943 100644
--- a/lib/librte_eal/bsdapp/eal/eal_timer.c
+++ b/lib/librte_eal/bsdapp/eal/eal_timer.c
@@ -42,7 +42,6 @@
#include <rte_log.h>
#include <rte_cycles.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_debug.h>
diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h
deleted file mode 100644
index 99a33432..00000000
--- a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*-
- * This file is provided under a dual BSD/LGPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GNU LESSER GENERAL PUBLIC LICENSE
- *
- * Copyright(c) 2007-2014 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Contact Information:
- * Intel Corporation
- *
- *
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef _RTE_DOM0_COMMON_H_
-#define _RTE_DOM0_COMMON_H_
-
-#ifdef __KERNEL__
-#include <linux/if.h>
-#endif
-
-#define DOM0_NAME_MAX 256
-#define DOM0_MM_DEV "/dev/dom0_mm"
-
-#define DOM0_CONTIG_NUM_ORDER 9 /**< 2M order */
-#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */
-#define DOM0_MEMBLOCK_SIZE 0x200000 /**< Maximum nb. of memory block(2M). */
-#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */
-#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */
-
-#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info)
-#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *)
-#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int)
-#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *)
-
-/**
- * A structure used to store memory information.
- */
-struct memory_info {
- char name[DOM0_NAME_MAX];
- uint64_t size;
-};
-
-/**
- * A structure used to store memory segment information.
- */
-struct memseg_info {
- uint32_t idx;
- uint64_t pfn;
- uint64_t size;
- uint64_t mfn[DOM0_NUM_MEMBLOCK];
-};
-
-/**
- * A structure used to store memory block information.
- */
-struct memblock_info {
- uint8_t exchange_flag;
- uint64_t vir_addr;
- uint64_t pfn;
- uint64_t mfn;
-};
-#endif /* _RTE_DOM0_COMMON_H_ */
diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h
deleted file mode 100644
index c1995ee1..00000000
--- a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _RTE_INTERRUPTS_H_
-#error "don't include this file directly, please include generic <rte_interrupts.h>"
-#endif
-
-#ifndef _RTE_BSDAPP_INTERRUPTS_H_
-#define _RTE_BSDAPP_INTERRUPTS_H_
-
-#define RTE_INTR_VEC_ZERO_OFFSET 0
-#define RTE_INTR_VEC_RXTX_OFFSET 1
-
-#define RTE_MAX_RXTX_INTR_VEC_ID 32
-
-enum rte_intr_handle_type {
- RTE_INTR_HANDLE_UNKNOWN = 0,
- RTE_INTR_HANDLE_UIO, /**< uio device handle */
- RTE_INTR_HANDLE_ALARM, /**< alarm handle */
- RTE_INTR_HANDLE_MAX
-};
-
-/** Handle for interrupts. */
-struct rte_intr_handle {
- int fd; /**< file descriptor */
- int uio_cfg_fd; /**< UIO config file descriptor */
- enum rte_intr_handle_type type; /**< handle type */
- int max_intr; /**< max interrupt requested */
- uint32_t nb_efd; /**< number of available efds */
- int *intr_vec; /**< intr vector number array */
-};
-
-/**
- * @param intr_handle
- * Pointer to the interrupt handle.
- * @param epfd
- * Epoll instance fd which the intr vector associated to.
- * @param op
- * The operation be performed for the vector.
- * Operation type of {ADD, DEL}.
- * @param vec
- * RX intr vector number added to the epoll instance wait list.
- * @param data
- * User raw data.
- * @return
- * - On success, zero.
- * - On failure, a negative value.
- */
-int
-rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
- int epfd, int op, unsigned int vec, void *data);
-
-/**
- * It enables the fastpath event fds if it's necessary.
- * It creates event fds when multi-vectors allowed,
- * otherwise it multiplexes the single event fds.
- *
- * @param intr_handle
- * Pointer to the interrupt handle.
- * @param nb_efd
- * Number of interrupt vector trying to enable.
- * The value 0 is not allowed.
- * @return
- * - On success, zero.
- * - On failure, a negative value.
- */
-int
-rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd);
-
-/**
- * It disable the fastpath event fds.
- * It deletes registered eventfds and closes the open fds.
- *
- * @param intr_handle
- * Pointer to the interrupt handle.
- */
-void
-rte_intr_efd_disable(struct rte_intr_handle *intr_handle);
-
-/**
- * The fastpath interrupt is enabled or not.
- *
- * @param intr_handle
- * Pointer to the interrupt handle.
- */
-int rte_intr_dp_is_en(struct rte_intr_handle *intr_handle);
-
-/**
- * The interrupt handle instance allows other cause or not.
- * Other cause stands for none fastpath interrupt.
- *
- * @param intr_handle
- * Pointer to the interrupt handle.
- */
-int rte_intr_allow_others(struct rte_intr_handle *intr_handle);
-
-/**
- * The multiple interrupt vector capability of interrupt handle instance.
- * It returns zero if no multiple interrupt vector support.
- *
- * @param intr_handle
- * Pointer to the interrupt handle.
- */
-int
-rte_intr_cap_multiple(struct rte_intr_handle *intr_handle);
-
-#endif /* _RTE_BSDAPP_INTERRUPTS_H_ */
diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
deleted file mode 100644
index aac6fd77..00000000
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ /dev/null
@@ -1,239 +0,0 @@
-DPDK_2.0 {
- global:
-
- __rte_panic;
- devargs_list;
- eal_parse_sysfs_value;
- eal_timer_source;
- lcore_config;
- per_lcore__lcore_id;
- per_lcore__rte_errno;
- rte_calloc;
- rte_calloc_socket;
- rte_cpu_check_supported;
- rte_cpu_get_flag_enabled;
- rte_cycles_vmware_tsc_map;
- rte_delay_us;
- rte_dump_physmem_layout;
- rte_dump_registers;
- rte_dump_stack;
- rte_dump_tailq;
- rte_eal_alarm_cancel;
- rte_eal_alarm_set;
- rte_eal_devargs_add;
- rte_eal_devargs_dump;
- rte_eal_devargs_type_count;
- rte_eal_get_configuration;
- rte_eal_get_lcore_state;
- rte_eal_get_physmem_layout;
- rte_eal_get_physmem_size;
- rte_eal_has_hugepages;
- rte_eal_hpet_init;
- rte_eal_init;
- rte_eal_iopl_init;
- rte_eal_lcore_role;
- rte_eal_mp_remote_launch;
- rte_eal_mp_wait_lcore;
- rte_eal_parse_devargs_str;
- rte_eal_process_type;
- rte_eal_remote_launch;
- rte_eal_tailq_lookup;
- rte_eal_tailq_register;
- rte_eal_wait_lcore;
- rte_exit;
- rte_free;
- rte_get_hpet_cycles;
- rte_get_hpet_hz;
- rte_get_log_level;
- rte_get_log_type;
- rte_get_tsc_hz;
- rte_hexdump;
- rte_intr_callback_register;
- rte_intr_callback_unregister;
- rte_intr_disable;
- rte_intr_enable;
- rte_log;
- rte_log_cur_msg_loglevel;
- rte_log_cur_msg_logtype;
- rte_logs;
- rte_malloc;
- rte_malloc_dump_stats;
- rte_malloc_get_socket_stats;
- rte_malloc_set_limit;
- rte_malloc_socket;
- rte_malloc_validate;
- rte_malloc_virt2phy;
- rte_mem_lock_page;
- rte_mem_phy2mch;
- rte_mem_virt2phy;
- rte_memdump;
- rte_memory_get_nchannel;
- rte_memory_get_nrank;
- rte_memzone_dump;
- rte_memzone_lookup;
- rte_memzone_reserve;
- rte_memzone_reserve_aligned;
- rte_memzone_reserve_bounded;
- rte_memzone_walk;
- rte_openlog_stream;
- rte_realloc;
- rte_set_application_usage_hook;
- rte_set_log_level;
- rte_set_log_type;
- rte_socket_id;
- rte_strerror;
- rte_strsplit;
- rte_sys_gettid;
- rte_thread_get_affinity;
- rte_thread_set_affinity;
- rte_vlog;
- rte_xen_dom0_memory_attach;
- rte_xen_dom0_memory_init;
- rte_zmalloc;
- rte_zmalloc_socket;
-
- local: *;
-};
-
-DPDK_2.1 {
- global:
-
- rte_intr_allow_others;
- rte_intr_dp_is_en;
- rte_intr_efd_disable;
- rte_intr_efd_enable;
- rte_intr_rx_ctl;
- rte_memzone_free;
-
-} DPDK_2.0;
-
-DPDK_2.2 {
- global:
-
- rte_intr_cap_multiple;
- rte_keepalive_create;
- rte_keepalive_dispatch_pings;
- rte_keepalive_mark_alive;
- rte_keepalive_register_core;
- rte_xen_dom0_supported;
-
-} DPDK_2.1;
-
-DPDK_16.04 {
- global:
-
- rte_cpu_get_flag_name;
- rte_eal_primary_proc_alive;
-
-} DPDK_2.2;
-
-DPDK_16.07 {
- global:
-
- pci_get_sysfs_path;
- rte_keepalive_mark_sleep;
- rte_keepalive_register_relay_callback;
- rte_rtm_supported;
- rte_thread_setname;
-
-} DPDK_16.04;
-
-DPDK_16.11 {
- global:
-
- rte_delay_us_block;
- rte_delay_us_callback_register;
- rte_eal_dev_attach;
- rte_eal_dev_detach;
-
-} DPDK_16.07;
-
-DPDK_17.02 {
- global:
-
- rte_bus_dump;
- rte_bus_probe;
- rte_bus_register;
- rte_bus_scan;
- rte_bus_unregister;
-
-} DPDK_16.11;
-
-DPDK_17.05 {
- global:
-
- rte_cpu_is_supported;
- rte_log_dump;
- rte_log_register;
- rte_log_get_global_level;
- rte_log_set_global_level;
- rte_log_set_level;
- rte_log_set_level_regexp;
- rte_pci_detach;
- rte_pci_dump;
- rte_pci_ioport_map;
- rte_pci_ioport_read;
- rte_pci_ioport_unmap;
- rte_pci_ioport_write;
- rte_pci_map_device;
- rte_pci_probe;
- rte_pci_probe_one;
- rte_pci_read_config;
- rte_pci_register;
- rte_pci_scan;
- rte_pci_unmap_device;
- rte_pci_unregister;
- rte_pci_write_config;
- rte_vdev_init;
- rte_vdev_register;
- rte_vdev_uninit;
- rte_vdev_unregister;
- vfio_get_container_fd;
- vfio_get_group_fd;
- vfio_get_group_no;
-
-} DPDK_17.02;
-
-DPDK_17.08 {
- global:
-
- rte_bus_find;
- rte_bus_find_by_device;
- rte_bus_find_by_name;
- rte_log_get_level;
-
-} DPDK_17.05;
-
-EXPERIMENTAL {
- global:
-
- rte_eal_devargs_insert;
- rte_eal_devargs_parse;
- rte_eal_devargs_remove;
- rte_eal_hotplug_add;
- rte_eal_hotplug_remove;
- rte_service_disable_on_lcore;
- rte_service_dump;
- rte_service_enable_on_lcore;
- rte_service_get_by_id;
- rte_service_get_by_name;
- rte_service_get_count;
- rte_service_get_enabled_on_lcore;
- rte_service_is_running;
- rte_service_lcore_add;
- rte_service_lcore_count;
- rte_service_lcore_del;
- rte_service_lcore_list;
- rte_service_lcore_reset_all;
- rte_service_lcore_start;
- rte_service_lcore_stop;
- rte_service_probe_capability;
- rte_service_register;
- rte_service_reset;
- rte_service_set_stats_enable;
- rte_service_start;
- rte_service_start_with_defaults;
- rte_service_stop;
- rte_service_unregister;
-
-} DPDK_17.08;
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index e8fd67a2..9effd0d4 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -32,16 +32,18 @@
include $(RTE_SDK)/mk/rte.vars.mk
INC := rte_branch_prediction.h rte_common.h
-INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
-INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
+INC += rte_debug.h rte_eal.h rte_eal_interrupts.h
+INC += rte_errno.h rte_launch.h rte_lcore.h
+INC += rte_log.h rte_memory.h rte_memzone.h
INC += rte_per_lcore.h rte_random.h
INC += rte_tailq.h rte_interrupts.h rte_alarm.h
INC += rte_string_fns.h rte_version.h
INC += rte_eal_memconfig.h rte_malloc_heap.h
-INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_vdev.h
+INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h
INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
INC += rte_malloc.h rte_keepalive.h rte_time.h
INC += rte_service.h rte_service_component.h
+INC += rte_bitmap.h rte_vfio.h
GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h
@@ -49,7 +51,7 @@ GENERIC_INC += rte_vect.h rte_pause.h rte_io.h
# defined in mk/arch/$(RTE_ARCH)/rte.vars.mk
ARCH_DIR ?= $(RTE_ARCH)
-ARCH_INC := $(notdir $(wildcard $(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h))
+ARCH_INC := $(sort $(notdir $(wildcard $(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h)))
SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include := $(addprefix include/,$(INC))
SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include += \
diff --git a/lib/librte_eal/common/arch/arm/rte_cpuflags.c b/lib/librte_eal/common/arch/arm/rte_cpuflags.c
index 5636e9c1..88f1cbe3 100644
--- a/lib/librte_eal/common/arch/arm/rte_cpuflags.c
+++ b/lib/librte_eal/common/arch/arm/rte_cpuflags.c
@@ -137,7 +137,7 @@ rte_cpu_get_features(hwcap_registers_t out)
_Elfx_auxv_t auxv;
auxv_fd = open("/proc/self/auxv", O_RDONLY);
- assert(auxv_fd);
+ assert(auxv_fd != -1);
while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) {
if (auxv.a_type == AT_HWCAP) {
out[REG_HWCAP] = auxv.a_un.a_val;
diff --git a/lib/librte_eal/common/arch/arm/rte_cycles.c b/lib/librte_eal/common/arch/arm/rte_cycles.c
new file mode 100644
index 00000000..3e31e5be
--- /dev/null
+++ b/lib/librte_eal/common/arch/arm/rte_cycles.c
@@ -0,0 +1,45 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium, Inc. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium, Inc nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "eal_private.h"
+
+uint64_t
+get_tsc_freq_arch(void)
+{
+#if defined RTE_ARCH_ARM64 && !defined RTE_ARM_EAL_RDTSC_USE_PMU
+ uint64_t freq;
+ asm volatile("mrs %0, cntfrq_el0" : "=r" (freq));
+ return freq;
+#else
+ return 0;
+#endif
+}
diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
index fcf96e04..970a61c5 100644
--- a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
+++ b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
@@ -108,7 +108,7 @@ rte_cpu_get_features(hwcap_registers_t out)
Elf64_auxv_t auxv;
auxv_fd = open("/proc/self/auxv", O_RDONLY);
- assert(auxv_fd);
+ assert(auxv_fd != -1);
while (read(auxv_fd, &auxv,
sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) {
if (auxv.a_type == AT_HWCAP)
diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cycles.c b/lib/librte_eal/common/arch/ppc_64/rte_cycles.c
new file mode 100644
index 00000000..69a9f747
--- /dev/null
+++ b/lib/librte_eal/common/arch/ppc_64/rte_cycles.c
@@ -0,0 +1,52 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) IBM Corporation 2014.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include "eal_filesystem.h"
+#include "eal_private.h"
+
+static const char sys_cpu_dir[] = "/sys/devices/system/cpu";
+
+uint64_t
+get_tsc_freq_arch(void)
+{
+ unsigned long cpu_hz;
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), "%s/cpu%d/cpufreq/cpuinfo_cur_freq",
+ sys_cpu_dir, rte_get_master_lcore());
+ if (eal_parse_sysfs_value(path, &cpu_hz) < 0)
+ RTE_LOG(WARNING, EAL, "Unable to parse %s\n", path);
+
+ return cpu_hz*1000;
+}
diff --git a/lib/librte_eal/common/arch/x86/rte_cpuflags.c b/lib/librte_eal/common/arch/x86/rte_cpuflags.c
index 01382571..7d4a0fef 100644
--- a/lib/librte_eal/common/arch/x86/rte_cpuflags.c
+++ b/lib/librte_eal/common/arch/x86/rte_cpuflags.c
@@ -36,6 +36,7 @@
#include <stdio.h>
#include <errno.h>
#include <stdint.h>
+#include <cpuid.h>
enum cpu_register_t {
RTE_REG_EAX = 0,
@@ -156,38 +157,12 @@ const struct feature_entry rte_cpu_feature_table[] = {
FEAT_DEF(INVTSC, 0x80000007, 0, RTE_REG_EDX, 8)
};
-/*
- * Execute CPUID instruction and get contents of a specific register
- *
- * This function, when compiled with GCC, will generate architecture-neutral
- * code, as per GCC manual.
- */
-static void
-rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out)
-{
-#if defined(__i386__) && defined(__PIC__)
- /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
- asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
- : "=r" (out[RTE_REG_EBX]),
- "=a" (out[RTE_REG_EAX]),
- "=c" (out[RTE_REG_ECX]),
- "=d" (out[RTE_REG_EDX])
- : "a" (leaf), "c" (subleaf));
-#else
- asm volatile("cpuid"
- : "=a" (out[RTE_REG_EAX]),
- "=b" (out[RTE_REG_EBX]),
- "=c" (out[RTE_REG_ECX]),
- "=d" (out[RTE_REG_EDX])
- : "a" (leaf), "c" (subleaf));
-#endif
-}
-
int
rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
{
const struct feature_entry *feat;
cpuid_registers_t regs;
+ unsigned int maxleaf;
if (feature >= RTE_CPUFLAG_NUMFLAGS)
/* Flag does not match anything in the feature tables */
@@ -199,13 +174,14 @@ rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
/* This entry in the table wasn't filled out! */
return -EFAULT;
- rte_cpu_get_features(feat->leaf & 0xffff0000, 0, regs);
- if (((regs[RTE_REG_EAX] ^ feat->leaf) & 0xffff0000) ||
- regs[RTE_REG_EAX] < feat->leaf)
+ maxleaf = __get_cpuid_max(feat->leaf & 0x80000000, NULL);
+
+ if (maxleaf < feat->leaf)
return 0;
- /* get the cpuid leaf containing the desired feature */
- rte_cpu_get_features(feat->leaf, feat->subleaf, regs);
+ __cpuid_count(feat->leaf, feat->subleaf,
+ regs[RTE_REG_EAX], regs[RTE_REG_EBX],
+ regs[RTE_REG_ECX], regs[RTE_REG_EDX]);
/* check if the feature is enabled */
return (regs[feat->reg] >> feat->bit) & 1;
diff --git a/lib/librte_eal/common/arch/x86/rte_cycles.c b/lib/librte_eal/common/arch/x86/rte_cycles.c
new file mode 100644
index 00000000..417850ee
--- /dev/null
+++ b/lib/librte_eal/common/arch/x86/rte_cycles.c
@@ -0,0 +1,152 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <cpuid.h>
+
+#include <rte_common.h>
+
+#include "eal_private.h"
+
+static unsigned int
+rte_cpu_get_model(uint32_t fam_mod_step)
+{
+ uint32_t family, model, ext_model;
+
+ family = (fam_mod_step >> 8) & 0xf;
+ model = (fam_mod_step >> 4) & 0xf;
+
+ if (family == 6 || family == 15) {
+ ext_model = (fam_mod_step >> 16) & 0xf;
+ model += (ext_model << 4);
+ }
+
+ return model;
+}
+
+static int32_t
+rdmsr(int msr, uint64_t *val)
+{
+#ifdef RTE_EXEC_ENV_LINUXAPP
+ int fd;
+ int ret;
+
+ fd = open("/dev/cpu/0/msr", O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ ret = pread(fd, val, sizeof(uint64_t), msr);
+
+ close(fd);
+
+ return ret;
+#else
+ RTE_SET_USED(msr);
+ RTE_SET_USED(val);
+
+ return -1;
+#endif
+}
+
+static uint32_t
+check_model_wsm_nhm(uint8_t model)
+{
+ switch (model) {
+ /* Westmere */
+ case 0x25:
+ case 0x2C:
+ case 0x2F:
+ /* Nehalem */
+ case 0x1E:
+ case 0x1F:
+ case 0x1A:
+ case 0x2E:
+ return 1;
+ }
+
+ return 0;
+}
+
+static uint32_t
+check_model_gdm_dnv(uint8_t model)
+{
+ switch (model) {
+ /* Goldmont */
+ case 0x5C:
+ /* Denverton */
+ case 0x5F:
+ return 1;
+ }
+
+ return 0;
+}
+
+uint64_t
+get_tsc_freq_arch(void)
+{
+ uint64_t tsc_hz = 0;
+ uint32_t a, b, c, d, maxleaf;
+ uint8_t mult, model;
+ int32_t ret;
+
+ /*
+ * Time Stamp Counter and Nominal Core Crystal Clock
+ * Information Leaf
+ */
+ maxleaf = __get_cpuid_max(0, NULL);
+
+ if (maxleaf >= 0x15) {
+ __cpuid(0x15, a, b, c, d);
+
+ /* EBX : TSC/Crystal ratio, ECX : Crystal Hz */
+ if (b && c)
+ return c * (b / a);
+ }
+
+ __cpuid(0x1, a, b, c, d);
+ model = rte_cpu_get_model(a);
+
+ if (check_model_wsm_nhm(model))
+ mult = 133;
+ else if ((c & bit_AVX) || check_model_gdm_dnv(model))
+ mult = 100;
+ else
+ return 0;
+
+ ret = rdmsr(0xCE, &tsc_hz);
+ if (ret < 0)
+ return 0;
+
+ return ((tsc_hz >> 8) & 0xff) * mult * 1E6;
+}
diff --git a/lib/librte_eal/common/arch/x86/rte_memcpy.c b/lib/librte_eal/common/arch/x86/rte_memcpy.c
new file mode 100644
index 00000000..174bef15
--- /dev/null
+++ b/lib/librte_eal/common/arch/x86/rte_memcpy.c
@@ -0,0 +1,58 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_memcpy.h>
+#include <rte_cpuflags.h>
+#include <rte_log.h>
+
+void *(*rte_memcpy_ptr)(void *dst, const void *src, size_t n) = NULL;
+
+RTE_INIT(rte_memcpy_init)
+{
+#ifdef CC_SUPPORT_AVX512F
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F)) {
+ rte_memcpy_ptr = rte_memcpy_avx512f;
+ RTE_LOG(DEBUG, EAL, "AVX512 memcpy is using!\n");
+ return;
+ }
+#endif
+#ifdef CC_SUPPORT_AVX2
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) {
+ rte_memcpy_ptr = rte_memcpy_avx2;
+ RTE_LOG(DEBUG, EAL, "AVX2 memcpy is using!\n");
+ return;
+ }
+#endif
+ rte_memcpy_ptr = rte_memcpy_sse;
+ RTE_LOG(DEBUG, EAL, "Default SSE/AVX memcpy is using!\n");
+}
diff --git a/lib/librte_eal/common/arch/x86/rte_spinlock.c b/lib/librte_eal/common/arch/x86/rte_spinlock.c
index c383e9f0..1244a90b 100644
--- a/lib/librte_eal/common/arch/x86/rte_spinlock.c
+++ b/lib/librte_eal/common/arch/x86/rte_spinlock.c
@@ -38,8 +38,7 @@
uint8_t rte_rtm_supported; /* cache the flag to avoid the overhead
of the rte_cpu_get_flag_enabled function */
-static void __attribute__((constructor))
-rte_rtm_init(void)
+RTE_INIT(rte_rtm_init)
{
rte_rtm_supported = rte_cpu_get_flag_enabled(RTE_CPUFLAG_RTM);
}
diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c
index 08bec2d9..3e022d51 100644
--- a/lib/librte_eal/common/eal_common_bus.c
+++ b/lib/librte_eal/common/eal_common_bus.c
@@ -35,6 +35,7 @@
#include <sys/queue.h>
#include <rte_bus.h>
+#include <rte_debug.h>
#include "eal_private.h"
@@ -73,11 +74,9 @@ rte_bus_scan(void)
TAILQ_FOREACH(bus, &rte_bus_list, next) {
ret = bus->scan();
- if (ret) {
+ if (ret)
RTE_LOG(ERR, EAL, "Scan for (%s) bus failed.\n",
bus->name);
- return ret;
- }
}
return 0;
@@ -97,20 +96,16 @@ rte_bus_probe(void)
}
ret = bus->probe();
- if (ret) {
+ if (ret)
RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n",
bus->name);
- return ret;
- }
}
if (vbus) {
ret = vbus->probe();
- if (ret) {
+ if (ret)
RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n",
vbus->name);
- return ret;
- }
}
return 0;
@@ -152,15 +147,16 @@ struct rte_bus *
rte_bus_find(const struct rte_bus *start, rte_bus_cmp_t cmp,
const void *data)
{
- struct rte_bus *bus = NULL;
+ struct rte_bus *bus;
- TAILQ_FOREACH(bus, &rte_bus_list, next) {
- if (start && bus == start) {
- start = NULL; /* starting point found */
- continue;
- }
+ if (start != NULL)
+ bus = TAILQ_NEXT(start, next);
+ else
+ bus = TAILQ_FIRST(&rte_bus_list);
+ while (bus != NULL) {
if (cmp(bus, data) == 0)
break;
+ bus = TAILQ_NEXT(bus, next);
}
return bus;
}
@@ -222,3 +218,26 @@ rte_bus_find_by_device_name(const char *str)
c[0] = '\0';
return rte_bus_find(NULL, bus_can_parse, name);
}
+
+
+/*
+ * Get iommu class of devices on the bus.
+ */
+enum rte_iova_mode
+rte_bus_get_iommu_class(void)
+{
+ int mode = RTE_IOVA_DC;
+ struct rte_bus *bus;
+
+ TAILQ_FOREACH(bus, &rte_bus_list, next) {
+
+ if (bus->get_iommu_class)
+ mode |= bus->get_iommu_class();
+ }
+
+ if (mode != RTE_IOVA_VA) {
+ /* Use default IOVA mode */
+ mode = RTE_IOVA_PA;
+ }
+ return mode;
+}
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index e2512755..dda8f583 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -67,7 +67,6 @@ static int cmp_dev_name(const struct rte_device *dev, const void *_name)
int rte_eal_dev_attach(const char *name, const char *devargs)
{
struct rte_bus *bus;
- int ret;
if (name == NULL || devargs == NULL) {
RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n");
@@ -80,22 +79,13 @@ int rte_eal_dev_attach(const char *name, const char *devargs)
name);
return -EINVAL;
}
- if (strcmp(bus->name, "pci") == 0)
- return rte_eal_hotplug_add("pci", name, devargs);
- if (strcmp(bus->name, "vdev") != 0) {
- RTE_LOG(ERR, EAL, "Device attach is only supported for PCI and vdev devices.\n");
- return -ENOTSUP;
- }
+ if (strcmp(bus->name, "pci") == 0 || strcmp(bus->name, "vdev") == 0)
+ return rte_eal_hotplug_add(bus->name, name, devargs);
- /*
- * If we haven't found a bus device the user meant to "hotplug" a
- * virtual device instead.
- */
- ret = rte_vdev_init(name, devargs);
- if (ret)
- RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
- name);
- return ret;
+ RTE_LOG(ERR, EAL,
+ "Device attach is only supported for PCI and vdev devices.\n");
+
+ return -ENOTSUP;
}
int rte_eal_dev_detach(struct rte_device *dev)
diff --git a/lib/librte_eal/common/eal_common_errno.c b/lib/librte_eal/common/eal_common_errno.c
index de48d8e4..dc5b7c04 100644
--- a/lib/librte_eal/common/eal_common_errno.c
+++ b/lib/librte_eal/common/eal_common_errno.c
@@ -46,18 +46,20 @@ RTE_DEFINE_PER_LCORE(int, _rte_errno);
const char *
rte_strerror(int errnum)
{
+ /* BSD puts a colon in the "unknown error" messages, Linux doesn't */
+#ifdef RTE_EXEC_ENV_BSDAPP
+ static const char *sep = ":";
+#else
+ static const char *sep = "";
+#endif
#define RETVAL_SZ 256
static RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
+ char *ret = RTE_PER_LCORE(retval);
/* since some implementations of strerror_r throw an error
* themselves if errnum is too big, we handle that case here */
- if (errnum > RTE_MAX_ERRNO)
- snprintf(RTE_PER_LCORE(retval), RETVAL_SZ,
-#ifdef RTE_EXEC_ENV_BSDAPP
- "Unknown error: %d", errnum);
-#else
- "Unknown error %d", errnum);
-#endif
+ if (errnum >= RTE_MAX_ERRNO)
+ snprintf(ret, RETVAL_SZ, "Unknown error%s %d", sep, errnum);
else
switch (errnum){
case E_RTE_SECONDARY:
@@ -65,8 +67,10 @@ rte_strerror(int errnum)
case E_RTE_NO_CONFIG:
return "Missing rte_config structure";
default:
- strerror_r(errnum, RTE_PER_LCORE(retval), RETVAL_SZ);
+ if (strerror_r(errnum, ret, RETVAL_SZ) != 0)
+ snprintf(ret, RETVAL_SZ, "Unknown error%s %d",
+ sep, errnum);
}
- return RTE_PER_LCORE(retval);
+ return ret;
}
diff --git a/lib/librte_eal/common/eal_common_launch.c b/lib/librte_eal/common/eal_common_launch.c
index 137c191d..2d5cae9f 100644
--- a/lib/librte_eal/common/eal_common_launch.c
+++ b/lib/librte_eal/common/eal_common_launch.c
@@ -38,7 +38,6 @@
#include <rte_launch.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_atomic.h>
#include <rte_pause.h>
diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c
index 0e3b9320..be404136 100644
--- a/lib/librte_eal/common/eal_common_log.c
+++ b/lib/librte_eal/common/eal_common_log.c
@@ -89,14 +89,6 @@ rte_log_set_global_level(uint32_t level)
rte_logs.level = (uint32_t)level;
}
-/* Set global log level */
-/* replaced by rte_log_set_global_level */
-__rte_deprecated void
-rte_set_log_level(uint32_t level)
-{
- rte_log_set_global_level(level);
-}
-
/* Get global log level */
uint32_t
rte_log_get_global_level(void)
@@ -104,14 +96,6 @@ rte_log_get_global_level(void)
return rte_logs.level;
}
-/* Get global log level */
-/* replaced by rte_log_get_global_level */
-uint32_t
-rte_get_log_level(void)
-{
- return rte_log_get_global_level();
-}
-
int
rte_log_get_level(uint32_t type)
{
@@ -121,30 +105,6 @@ rte_log_get_level(uint32_t type)
return rte_logs.dynamic_types[type].loglevel;
}
-/* Set global log type */
-__rte_deprecated void
-rte_set_log_type(uint32_t type, int enable)
-{
- if (type < RTE_LOGTYPE_FIRST_EXT_ID) {
- if (enable)
- rte_logs.type |= 1 << type;
- else
- rte_logs.type &= ~(1 << type);
- }
-
- if (enable)
- rte_log_set_level(type, 0);
- else
- rte_log_set_level(type, RTE_LOG_DEBUG);
-}
-
-/* Get global log type */
-__rte_deprecated uint32_t
-rte_get_log_type(void)
-{
- return rte_logs.type;
-}
-
int
rte_log_set_level(uint32_t type, uint32_t level)
{
@@ -289,7 +249,8 @@ static const struct logtype logtype_strings[] = {
{RTE_LOGTYPE_USER8, "user8"}
};
-RTE_INIT(rte_log_init);
+/* Logging should be first initialzer (before drivers and bus) */
+RTE_INIT_PRIO(rte_log_init, 101);
static void
rte_log_init(void)
{
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 996877ef..fc6c44da 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -41,7 +41,6 @@
#include <sys/queue.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
#include <rte_log.h>
@@ -96,11 +95,11 @@ rte_dump_physmem_layout(FILE *f)
if (mcfg->memseg[i].addr == NULL)
break;
- fprintf(f, "Segment %u: phys:0x%"PRIx64", len:%zu, "
+ fprintf(f, "Segment %u: IOVA:0x%"PRIx64", len:%zu, "
"virt:%p, socket_id:%"PRId32", "
"hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
"nrank:%"PRIx32"\n", i,
- mcfg->memseg[i].phys_addr,
+ mcfg->memseg[i].iova,
mcfg->memseg[i].len,
mcfg->memseg[i].addr,
mcfg->memseg[i].socket_id,
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index 3026e36b..ea072a25 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -251,7 +251,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
mcfg->memzone_cnt++;
snprintf(mz->name, sizeof(mz->name), "%s", name);
- mz->phys_addr = rte_malloc_virt2phy(mz_addr);
+ mz->iova = rte_malloc_virt2iova(mz_addr);
mz->addr = mz_addr;
mz->len = (requested_len == 0 ? elem->size : requested_len);
mz->hugepage_sz = elem->ms->hugepage_sz;
@@ -391,10 +391,10 @@ rte_memzone_dump(FILE *f)
for (i=0; i<RTE_MAX_MEMZONE; i++) {
if (mcfg->memzone[i].addr == NULL)
break;
- fprintf(f, "Zone %u: name:<%s>, phys:0x%"PRIx64", len:0x%zx"
+ fprintf(f, "Zone %u: name:<%s>, IO:0x%"PRIx64", len:0x%zx"
", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i,
mcfg->memzone[i].name,
- mcfg->memzone[i].phys_addr,
+ mcfg->memzone[i].iova,
mcfg->memzone[i].len,
mcfg->memzone[i].addr,
mcfg->memzone[i].socket_id,
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 1da185e5..996a0342 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -85,6 +85,7 @@ eal_long_options[] = {
{OPT_LCORES, 1, NULL, OPT_LCORES_NUM },
{OPT_LOG_LEVEL, 1, NULL, OPT_LOG_LEVEL_NUM },
{OPT_MASTER_LCORE, 1, NULL, OPT_MASTER_LCORE_NUM },
+ {OPT_MBUF_POOL_OPS_NAME, 1, NULL, OPT_MBUF_POOL_OPS_NAME_NUM},
{OPT_NO_HPET, 0, NULL, OPT_NO_HPET_NUM },
{OPT_NO_HUGE, 0, NULL, OPT_NO_HUGE_NUM },
{OPT_NO_PCI, 0, NULL, OPT_NO_PCI_NUM },
@@ -97,7 +98,6 @@ eal_long_options[] = {
{OPT_VDEV, 1, NULL, OPT_VDEV_NUM },
{OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM },
{OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM },
- {OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM },
{0, 0, NULL, 0 }
};
@@ -208,8 +208,6 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
internal_cfg->syslog_facility = LOG_DAEMON;
- internal_cfg->xen_dom0_support = 0;
-
/* if set to NONE, interrupt mode is determined automatically */
internal_cfg->vfio_intr_mode = RTE_INTR_MODE_NONE;
@@ -220,6 +218,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
#endif
internal_cfg->vmware_tsc_map = 0;
internal_cfg->create_uio_dev = 0;
+ internal_cfg->mbuf_pool_ops_name = RTE_MBUF_DEFAULT_MEMPOOL_OPS;
}
static int
@@ -279,12 +278,13 @@ int
eal_plugins_init(void)
{
struct shared_driver *solib = NULL;
+ struct stat sb;
- if (*default_solib_dir != '\0')
+ if (*default_solib_dir != '\0' && stat(default_solib_dir, &sb) == 0 &&
+ S_ISDIR(sb.st_mode))
eal_plugin_add(default_solib_dir);
TAILQ_FOREACH(solib, &solib_list, next) {
- struct stat sb;
if (stat(solib->name, &sb) == 0 && S_ISDIR(sb.st_mode)) {
if (eal_plugindir_init(solib->name) == -1) {
@@ -1279,6 +1279,7 @@ eal_common_usage(void)
" '@' can be omitted if cpus and lcores have the same value\n"
" -s SERVICE COREMASK Hexadecimal bitmask of cores to be used as service cores\n"
" --"OPT_MASTER_LCORE" ID Core ID that is used as master\n"
+ " --"OPT_MBUF_POOL_OPS_NAME" Pool ops name for mbuf to use\n"
" -n CHANNELS Number of memory channels\n"
" -m MB Memory to allocate (see also --"OPT_SOCKET_MEM")\n"
" -r RANKS Force number of memory ranks (don't detect)\n"
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
deleted file mode 100644
index 52fd38cd..00000000
--- a/lib/librte_eal/common/eal_common_pci.c
+++ /dev/null
@@ -1,580 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * Copyright 2013-2014 6WIND S.A.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <string.h>
-#include <inttypes.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <sys/queue.h>
-#include <sys/mman.h>
-
-#include <rte_errno.h>
-#include <rte_interrupts.h>
-#include <rte_log.h>
-#include <rte_bus.h>
-#include <rte_pci.h>
-#include <rte_per_lcore.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_eal.h>
-#include <rte_string_fns.h>
-#include <rte_common.h>
-#include <rte_devargs.h>
-
-#include "eal_private.h"
-
-extern struct rte_pci_bus rte_pci_bus;
-
-#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
-
-const char *pci_get_sysfs_path(void)
-{
- const char *path = NULL;
-
- path = getenv("SYSFS_PCI_DEVICES");
- if (path == NULL)
- return SYSFS_PCI_DEVICES;
-
- return path;
-}
-
-static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
-{
- struct rte_devargs *devargs;
- struct rte_pci_addr addr;
- struct rte_bus *pbus;
-
- pbus = rte_bus_find_by_name("pci");
- TAILQ_FOREACH(devargs, &devargs_list, next) {
- if (devargs->bus != pbus)
- continue;
- devargs->bus->parse(devargs->name, &addr);
- if (!rte_eal_compare_pci_addr(&dev->addr, &addr))
- return devargs;
- }
- return NULL;
-}
-
-void
-pci_name_set(struct rte_pci_device *dev)
-{
- struct rte_devargs *devargs;
-
- /* Each device has its internal, canonical name set. */
- rte_pci_device_name(&dev->addr,
- dev->name, sizeof(dev->name));
- devargs = pci_devargs_lookup(dev);
- dev->device.devargs = devargs;
- /* In blacklist mode, if the device is not blacklisted, no
- * rte_devargs exists for it.
- */
- if (devargs != NULL)
- /* If an rte_devargs exists, the generic rte_device uses the
- * given name as its namea
- */
- dev->device.name = dev->device.devargs->name;
- else
- /* Otherwise, it uses the internal, canonical form. */
- dev->device.name = dev->name;
-}
-
-/* map a particular resource from a file */
-void *
-pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
- int additional_flags)
-{
- void *mapaddr;
-
- /* Map the PCI memory resource of device */
- mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
- MAP_SHARED | additional_flags, fd, offset);
- if (mapaddr == MAP_FAILED) {
- RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n",
- __func__, fd, requested_addr,
- (unsigned long)size, (unsigned long)offset,
- strerror(errno), mapaddr);
- } else
- RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr);
-
- return mapaddr;
-}
-
-/* unmap a particular resource */
-void
-pci_unmap_resource(void *requested_addr, size_t size)
-{
- if (requested_addr == NULL)
- return;
-
- /* Unmap the PCI memory resource of device */
- if (munmap(requested_addr, size)) {
- RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n",
- __func__, requested_addr, (unsigned long)size,
- strerror(errno));
- } else
- RTE_LOG(DEBUG, EAL, " PCI memory unmapped at %p\n",
- requested_addr);
-}
-
-/*
- * Match the PCI Driver and Device using the ID Table
- *
- * @param pci_drv
- * PCI driver from which ID table would be extracted
- * @param pci_dev
- * PCI device to match against the driver
- * @return
- * 1 for successful match
- * 0 for unsuccessful match
- */
-static int
-rte_pci_match(const struct rte_pci_driver *pci_drv,
- const struct rte_pci_device *pci_dev)
-{
- const struct rte_pci_id *id_table;
-
- for (id_table = pci_drv->id_table; id_table->vendor_id != 0;
- id_table++) {
- /* check if device's identifiers match the driver's ones */
- if (id_table->vendor_id != pci_dev->id.vendor_id &&
- id_table->vendor_id != PCI_ANY_ID)
- continue;
- if (id_table->device_id != pci_dev->id.device_id &&
- id_table->device_id != PCI_ANY_ID)
- continue;
- if (id_table->subsystem_vendor_id !=
- pci_dev->id.subsystem_vendor_id &&
- id_table->subsystem_vendor_id != PCI_ANY_ID)
- continue;
- if (id_table->subsystem_device_id !=
- pci_dev->id.subsystem_device_id &&
- id_table->subsystem_device_id != PCI_ANY_ID)
- continue;
- if (id_table->class_id != pci_dev->id.class_id &&
- id_table->class_id != RTE_CLASS_ANY_ID)
- continue;
-
- return 1;
- }
-
- return 0;
-}
-
-/*
- * If vendor/device ID match, call the probe() function of the
- * driver.
- */
-static int
-rte_pci_probe_one_driver(struct rte_pci_driver *dr,
- struct rte_pci_device *dev)
-{
- int ret;
- struct rte_pci_addr *loc;
-
- if ((dr == NULL) || (dev == NULL))
- return -EINVAL;
-
- loc = &dev->addr;
-
- /* The device is not blacklisted; Check if driver supports it */
- if (!rte_pci_match(dr, dev))
- /* Match of device and driver failed */
- return 1;
-
- RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
- loc->domain, loc->bus, loc->devid, loc->function,
- dev->device.numa_node);
-
- /* no initialization when blacklisted, return without error */
- if (dev->device.devargs != NULL &&
- dev->device.devargs->policy ==
- RTE_DEV_BLACKLISTED) {
- RTE_LOG(INFO, EAL, " Device is blacklisted, not"
- " initializing\n");
- return 1;
- }
-
- if (dev->device.numa_node < 0) {
- RTE_LOG(WARNING, EAL, " Invalid NUMA socket, default to 0\n");
- dev->device.numa_node = 0;
- }
-
- RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id,
- dev->id.device_id, dr->driver.name);
-
- if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
- /* map resources for devices that use igb_uio */
- ret = rte_pci_map_device(dev);
- if (ret != 0)
- return ret;
- }
-
- /* reference driver structure */
- dev->driver = dr;
- dev->device.driver = &dr->driver;
-
- /* call the driver probe() function */
- ret = dr->probe(dr, dev);
- if (ret) {
- dev->driver = NULL;
- dev->device.driver = NULL;
- if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) &&
- /* Don't unmap if device is unsupported and
- * driver needs mapped resources.
- */
- !(ret > 0 &&
- (dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES)))
- rte_pci_unmap_device(dev);
- }
-
- return ret;
-}
-
-/*
- * If vendor/device ID match, call the remove() function of the
- * driver.
- */
-static int
-rte_pci_detach_dev(struct rte_pci_device *dev)
-{
- struct rte_pci_addr *loc;
- struct rte_pci_driver *dr;
-
- if (dev == NULL)
- return -EINVAL;
-
- dr = dev->driver;
- loc = &dev->addr;
-
- RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
- loc->domain, loc->bus, loc->devid,
- loc->function, dev->device.numa_node);
-
- RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id,
- dev->id.device_id, dr->driver.name);
-
- if (dr->remove && (dr->remove(dev) < 0))
- return -1; /* negative value is an error */
-
- /* clear driver structure */
- dev->driver = NULL;
-
- if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)
- /* unmap resources for devices that use igb_uio */
- rte_pci_unmap_device(dev);
-
- return 0;
-}
-
-/*
- * If vendor/device ID match, call the probe() function of all
- * registered driver for the given device. Return -1 if initialization
- * failed, return 1 if no driver is found for this device.
- */
-static int
-pci_probe_all_drivers(struct rte_pci_device *dev)
-{
- struct rte_pci_driver *dr = NULL;
- int rc = 0;
-
- if (dev == NULL)
- return -1;
-
- /* Check if a driver is already loaded */
- if (dev->driver != NULL)
- return 0;
-
- FOREACH_DRIVER_ON_PCIBUS(dr) {
- rc = rte_pci_probe_one_driver(dr, dev);
- if (rc < 0)
- /* negative value is an error */
- return -1;
- if (rc > 0)
- /* positive value means driver doesn't support it */
- continue;
- return 0;
- }
- return 1;
-}
-
-/*
- * Find the pci device specified by pci address, then invoke probe function of
- * the driver of the device.
- */
-int
-rte_pci_probe_one(const struct rte_pci_addr *addr)
-{
- struct rte_pci_device *dev = NULL;
-
- int ret = 0;
-
- if (addr == NULL)
- return -1;
-
- /* update current pci device in global list, kernel bindings might have
- * changed since last time we looked at it.
- */
- if (pci_update_device(addr) < 0)
- goto err_return;
-
- FOREACH_DEVICE_ON_PCIBUS(dev) {
- if (rte_eal_compare_pci_addr(&dev->addr, addr))
- continue;
-
- ret = pci_probe_all_drivers(dev);
- if (ret)
- goto err_return;
- return 0;
- }
- return -1;
-
-err_return:
- RTE_LOG(WARNING, EAL,
- "Requested device " PCI_PRI_FMT " cannot be used\n",
- addr->domain, addr->bus, addr->devid, addr->function);
- return -1;
-}
-
-/*
- * Detach device specified by its pci address.
- */
-int
-rte_pci_detach(const struct rte_pci_addr *addr)
-{
- struct rte_pci_device *dev = NULL;
- int ret = 0;
-
- if (addr == NULL)
- return -1;
-
- FOREACH_DEVICE_ON_PCIBUS(dev) {
- if (rte_eal_compare_pci_addr(&dev->addr, addr))
- continue;
-
- ret = rte_pci_detach_dev(dev);
- if (ret < 0)
- /* negative value is an error */
- goto err_return;
- if (ret > 0)
- /* positive value means driver doesn't support it */
- continue;
-
- rte_pci_remove_device(dev);
- free(dev);
- return 0;
- }
- return -1;
-
-err_return:
- RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT
- " cannot be used\n", dev->addr.domain, dev->addr.bus,
- dev->addr.devid, dev->addr.function);
- return -1;
-}
-
-/*
- * Scan the content of the PCI bus, and call the probe() function for
- * all registered drivers that have a matching entry in its id_table
- * for discovered devices.
- */
-int
-rte_pci_probe(void)
-{
- struct rte_pci_device *dev = NULL;
- size_t probed = 0, failed = 0;
- struct rte_devargs *devargs;
- int probe_all = 0;
- int ret = 0;
-
- if (rte_pci_bus.bus.conf.scan_mode != RTE_BUS_SCAN_WHITELIST)
- probe_all = 1;
-
- FOREACH_DEVICE_ON_PCIBUS(dev) {
- probed++;
-
- devargs = dev->device.devargs;
- /* probe all or only whitelisted devices */
- if (probe_all)
- ret = pci_probe_all_drivers(dev);
- else if (devargs != NULL &&
- devargs->policy == RTE_DEV_WHITELISTED)
- ret = pci_probe_all_drivers(dev);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, "Requested device " PCI_PRI_FMT
- " cannot be used\n", dev->addr.domain, dev->addr.bus,
- dev->addr.devid, dev->addr.function);
- rte_errno = errno;
- failed++;
- ret = 0;
- }
- }
-
- return (probed && probed == failed) ? -1 : 0;
-}
-
-/* dump one device */
-static int
-pci_dump_one_device(FILE *f, struct rte_pci_device *dev)
-{
- int i;
-
- fprintf(f, PCI_PRI_FMT, dev->addr.domain, dev->addr.bus,
- dev->addr.devid, dev->addr.function);
- fprintf(f, " - vendor:%x device:%x\n", dev->id.vendor_id,
- dev->id.device_id);
-
- for (i = 0; i != sizeof(dev->mem_resource) /
- sizeof(dev->mem_resource[0]); i++) {
- fprintf(f, " %16.16"PRIx64" %16.16"PRIx64"\n",
- dev->mem_resource[i].phys_addr,
- dev->mem_resource[i].len);
- }
- return 0;
-}
-
-/* dump devices on the bus */
-void
-rte_pci_dump(FILE *f)
-{
- struct rte_pci_device *dev = NULL;
-
- FOREACH_DEVICE_ON_PCIBUS(dev) {
- pci_dump_one_device(f, dev);
- }
-}
-
-static int
-pci_parse(const char *name, void *addr)
-{
- struct rte_pci_addr *out = addr;
- struct rte_pci_addr pci_addr;
- bool parse;
-
- parse = (eal_parse_pci_BDF(name, &pci_addr) == 0 ||
- eal_parse_pci_DomBDF(name, &pci_addr) == 0);
- if (parse && addr != NULL)
- *out = pci_addr;
- return parse == false;
-}
-
-/* register a driver */
-void
-rte_pci_register(struct rte_pci_driver *driver)
-{
- TAILQ_INSERT_TAIL(&rte_pci_bus.driver_list, driver, next);
- driver->bus = &rte_pci_bus;
-}
-
-/* unregister a driver */
-void
-rte_pci_unregister(struct rte_pci_driver *driver)
-{
- TAILQ_REMOVE(&rte_pci_bus.driver_list, driver, next);
- driver->bus = NULL;
-}
-
-/* Add a device to PCI bus */
-void
-rte_pci_add_device(struct rte_pci_device *pci_dev)
-{
- TAILQ_INSERT_TAIL(&rte_pci_bus.device_list, pci_dev, next);
-}
-
-/* Insert a device into a predefined position in PCI bus */
-void
-rte_pci_insert_device(struct rte_pci_device *exist_pci_dev,
- struct rte_pci_device *new_pci_dev)
-{
- TAILQ_INSERT_BEFORE(exist_pci_dev, new_pci_dev, next);
-}
-
-/* Remove a device from PCI bus */
-void
-rte_pci_remove_device(struct rte_pci_device *pci_dev)
-{
- TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next);
-}
-
-static struct rte_device *
-pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
- const void *data)
-{
- struct rte_pci_device *dev;
-
- FOREACH_DEVICE_ON_PCIBUS(dev) {
- if (start && &dev->device == start) {
- start = NULL; /* starting point found */
- continue;
- }
- if (cmp(&dev->device, data) == 0)
- return &dev->device;
- }
-
- return NULL;
-}
-
-static int
-pci_plug(struct rte_device *dev)
-{
- return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev));
-}
-
-static int
-pci_unplug(struct rte_device *dev)
-{
- struct rte_pci_device *pdev;
- int ret;
-
- pdev = RTE_DEV_TO_PCI(dev);
- ret = rte_pci_detach_dev(pdev);
- rte_pci_remove_device(pdev);
- free(pdev);
- return ret;
-}
-
-struct rte_pci_bus rte_pci_bus = {
- .bus = {
- .scan = rte_pci_scan,
- .probe = rte_pci_probe,
- .find_device = pci_find_device,
- .plug = pci_plug,
- .unplug = pci_unplug,
- .parse = pci_parse,
- },
- .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
- .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
-};
-
-RTE_REGISTER_BUS(pci, rte_pci_bus.bus);
diff --git a/lib/librte_eal/common/eal_common_pci_uio.c b/lib/librte_eal/common/eal_common_pci_uio.c
deleted file mode 100644
index 367a6816..00000000
--- a/lib/librte_eal/common/eal_common_pci_uio.c
+++ /dev/null
@@ -1,233 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <fcntl.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-
-#include <rte_eal.h>
-#include <rte_tailq.h>
-#include <rte_log.h>
-#include <rte_malloc.h>
-
-#include "eal_private.h"
-
-static struct rte_tailq_elem rte_uio_tailq = {
- .name = "UIO_RESOURCE_LIST",
-};
-EAL_REGISTER_TAILQ(rte_uio_tailq)
-
-static int
-pci_uio_map_secondary(struct rte_pci_device *dev)
-{
- int fd, i, j;
- struct mapped_pci_resource *uio_res;
- struct mapped_pci_res_list *uio_res_list =
- RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
-
- TAILQ_FOREACH(uio_res, uio_res_list, next) {
-
- /* skip this element if it doesn't match our PCI address */
- if (rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
- continue;
-
- for (i = 0; i != uio_res->nb_maps; i++) {
- /*
- * open devname, to mmap it
- */
- fd = open(uio_res->maps[i].path, O_RDWR);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
- uio_res->maps[i].path, strerror(errno));
- return -1;
- }
-
- void *mapaddr = pci_map_resource(uio_res->maps[i].addr,
- fd, (off_t)uio_res->maps[i].offset,
- (size_t)uio_res->maps[i].size, 0);
- /* fd is not needed in slave process, close it */
- close(fd);
- if (mapaddr != uio_res->maps[i].addr) {
- RTE_LOG(ERR, EAL,
- "Cannot mmap device resource file %s to address: %p\n",
- uio_res->maps[i].path,
- uio_res->maps[i].addr);
- if (mapaddr != MAP_FAILED) {
- /* unmap addrs correctly mapped */
- for (j = 0; j < i; j++)
- pci_unmap_resource(
- uio_res->maps[j].addr,
- (size_t)uio_res->maps[j].size);
- /* unmap addr wrongly mapped */
- pci_unmap_resource(mapaddr,
- (size_t)uio_res->maps[i].size);
- }
- return -1;
- }
- }
- return 0;
- }
-
- RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
- return 1;
-}
-
-/* map the PCI resource of a PCI device in virtual memory */
-int
-pci_uio_map_resource(struct rte_pci_device *dev)
-{
- int i, map_idx = 0, ret;
- uint64_t phaddr;
- struct mapped_pci_resource *uio_res = NULL;
- struct mapped_pci_res_list *uio_res_list =
- RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
-
- dev->intr_handle.fd = -1;
- dev->intr_handle.uio_cfg_fd = -1;
- dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
-
- /* secondary processes - use already recorded details */
- if (rte_eal_process_type() != RTE_PROC_PRIMARY)
- return pci_uio_map_secondary(dev);
-
- /* allocate uio resource */
- ret = pci_uio_alloc_resource(dev, &uio_res);
- if (ret)
- return ret;
-
- /* Map all BARs */
- for (i = 0; i != PCI_MAX_RESOURCE; i++) {
- /* skip empty BAR */
- phaddr = dev->mem_resource[i].phys_addr;
- if (phaddr == 0)
- continue;
-
- ret = pci_uio_map_resource_by_index(dev, i,
- uio_res, map_idx);
- if (ret)
- goto error;
-
- map_idx++;
- }
-
- uio_res->nb_maps = map_idx;
-
- TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
-
- return 0;
-error:
- for (i = 0; i < map_idx; i++) {
- pci_unmap_resource(uio_res->maps[i].addr,
- (size_t)uio_res->maps[i].size);
- rte_free(uio_res->maps[i].path);
- }
- pci_uio_free_resource(dev, uio_res);
- return -1;
-}
-
-static void
-pci_uio_unmap(struct mapped_pci_resource *uio_res)
-{
- int i;
-
- if (uio_res == NULL)
- return;
-
- for (i = 0; i != uio_res->nb_maps; i++) {
- pci_unmap_resource(uio_res->maps[i].addr,
- (size_t)uio_res->maps[i].size);
- if (rte_eal_process_type() == RTE_PROC_PRIMARY)
- rte_free(uio_res->maps[i].path);
- }
-}
-
-static struct mapped_pci_resource *
-pci_uio_find_resource(struct rte_pci_device *dev)
-{
- struct mapped_pci_resource *uio_res;
- struct mapped_pci_res_list *uio_res_list =
- RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
-
- if (dev == NULL)
- return NULL;
-
- TAILQ_FOREACH(uio_res, uio_res_list, next) {
-
- /* skip this element if it doesn't match our PCI address */
- if (!rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
- return uio_res;
- }
- return NULL;
-}
-
-/* unmap the PCI resource of a PCI device in virtual memory */
-void
-pci_uio_unmap_resource(struct rte_pci_device *dev)
-{
- struct mapped_pci_resource *uio_res;
- struct mapped_pci_res_list *uio_res_list =
- RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
-
- if (dev == NULL)
- return;
-
- /* find an entry for the device */
- uio_res = pci_uio_find_resource(dev);
- if (uio_res == NULL)
- return;
-
- /* secondary processes - just free maps */
- if (rte_eal_process_type() != RTE_PROC_PRIMARY)
- return pci_uio_unmap(uio_res);
-
- TAILQ_REMOVE(uio_res_list, uio_res, next);
-
- /* unmap all resources */
- pci_uio_unmap(uio_res);
-
- /* free uio resource */
- rte_free(uio_res);
-
- /* close fd if in primary process */
- close(dev->intr_handle.fd);
- if (dev->intr_handle.uio_cfg_fd >= 0) {
- close(dev->intr_handle.uio_cfg_fd);
- dev->intr_handle.uio_cfg_fd = -1;
- }
-
- dev->intr_handle.fd = -1;
- dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
-}
diff --git a/lib/librte_eal/common/eal_common_tailqs.c b/lib/librte_eal/common/eal_common_tailqs.c
index 55955f9e..6ae09fdb 100644
--- a/lib/librte_eal/common/eal_common_tailqs.c
+++ b/lib/librte_eal/common/eal_common_tailqs.c
@@ -40,7 +40,6 @@
#include <inttypes.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 2405e93f..55e96963 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -53,6 +53,20 @@ unsigned rte_socket_id(void)
return RTE_PER_LCORE(_socket_id);
}
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ if (lcore_id >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ if (cfg->lcore_role[lcore_id] == role)
+ return 0;
+
+ return -EINVAL;
+}
+
int eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
{
unsigned cpu = 0;
diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c
index ed0b16d0..568ae2fd 100644
--- a/lib/librte_eal/common/eal_common_timer.c
+++ b/lib/librte_eal/common/eal_common_timer.c
@@ -80,8 +80,11 @@ estimate_tsc_freq(void)
void
set_tsc_freq(void)
{
- uint64_t freq = get_tsc_freq();
+ uint64_t freq;
+ freq = get_tsc_freq_arch();
+ if (!freq)
+ freq = get_tsc_freq();
if (!freq)
freq = estimate_tsc_freq();
@@ -94,8 +97,7 @@ void rte_delay_us_callback_register(void (*userfunc)(unsigned int))
rte_delay_us = userfunc;
}
-static void __attribute__((constructor))
-rte_timer_init(void)
+RTE_INIT(rte_timer_init)
{
/* set rte_delay_us_block as a delay function */
rte_delay_us_callback_register(rte_delay_us_block);
diff --git a/lib/librte_eal/common/eal_common_vdev.c b/lib/librte_eal/common/eal_common_vdev.c
deleted file mode 100644
index f7e547a6..00000000
--- a/lib/librte_eal/common/eal_common_vdev.c
+++ /dev/null
@@ -1,342 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2016 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <string.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <sys/queue.h>
-
-#include <rte_eal.h>
-#include <rte_dev.h>
-#include <rte_bus.h>
-#include <rte_vdev.h>
-#include <rte_common.h>
-#include <rte_devargs.h>
-#include <rte_memory.h>
-#include <rte_errno.h>
-
-/* Forward declare to access virtual bus name */
-static struct rte_bus rte_vdev_bus;
-
-/** Double linked list of virtual device drivers. */
-TAILQ_HEAD(vdev_device_list, rte_vdev_device);
-
-static struct vdev_device_list vdev_device_list =
- TAILQ_HEAD_INITIALIZER(vdev_device_list);
-struct vdev_driver_list vdev_driver_list =
- TAILQ_HEAD_INITIALIZER(vdev_driver_list);
-
-/* register a driver */
-void
-rte_vdev_register(struct rte_vdev_driver *driver)
-{
- TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next);
-}
-
-/* unregister a driver */
-void
-rte_vdev_unregister(struct rte_vdev_driver *driver)
-{
- TAILQ_REMOVE(&vdev_driver_list, driver, next);
-}
-
-static int
-vdev_parse(const char *name, void *addr)
-{
- struct rte_vdev_driver **out = addr;
- struct rte_vdev_driver *driver = NULL;
-
- TAILQ_FOREACH(driver, &vdev_driver_list, next) {
- if (strncmp(driver->driver.name, name,
- strlen(driver->driver.name)) == 0)
- break;
- if (driver->driver.alias &&
- strncmp(driver->driver.alias, name,
- strlen(driver->driver.alias)) == 0)
- break;
- }
- if (driver != NULL &&
- addr != NULL)
- *out = driver;
- return driver == NULL;
-}
-
-static int
-vdev_probe_all_drivers(struct rte_vdev_device *dev)
-{
- const char *name;
- struct rte_vdev_driver *driver;
- int ret;
-
- name = rte_vdev_device_name(dev);
-
- RTE_LOG(DEBUG, EAL, "Search driver %s to probe device %s\n", name,
- rte_vdev_device_name(dev));
-
- if (vdev_parse(name, &driver))
- return -1;
- dev->device.driver = &driver->driver;
- ret = driver->probe(dev);
- if (ret)
- dev->device.driver = NULL;
- return ret;
-}
-
-static struct rte_vdev_device *
-find_vdev(const char *name)
-{
- struct rte_vdev_device *dev;
-
- if (!name)
- return NULL;
-
- TAILQ_FOREACH(dev, &vdev_device_list, next) {
- const char *devname = rte_vdev_device_name(dev);
- if (!strncmp(devname, name, strlen(name)))
- return dev;
- }
-
- return NULL;
-}
-
-static struct rte_devargs *
-alloc_devargs(const char *name, const char *args)
-{
- struct rte_devargs *devargs;
- int ret;
-
- devargs = calloc(1, sizeof(*devargs));
- if (!devargs)
- return NULL;
-
- devargs->bus = &rte_vdev_bus;
- if (args)
- devargs->args = strdup(args);
- else
- devargs->args = strdup("");
-
- ret = snprintf(devargs->name, sizeof(devargs->name), "%s", name);
- if (ret < 0 || ret >= (int)sizeof(devargs->name)) {
- free(devargs->args);
- free(devargs);
- return NULL;
- }
-
- return devargs;
-}
-
-int
-rte_vdev_init(const char *name, const char *args)
-{
- struct rte_vdev_device *dev;
- struct rte_devargs *devargs;
- int ret;
-
- if (name == NULL)
- return -EINVAL;
-
- dev = find_vdev(name);
- if (dev)
- return -EEXIST;
-
- devargs = alloc_devargs(name, args);
- if (!devargs)
- return -ENOMEM;
-
- dev = calloc(1, sizeof(*dev));
- if (!dev) {
- ret = -ENOMEM;
- goto fail;
- }
-
- dev->device.devargs = devargs;
- dev->device.numa_node = SOCKET_ID_ANY;
- dev->device.name = devargs->name;
-
- ret = vdev_probe_all_drivers(dev);
- if (ret) {
- if (ret > 0)
- RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
- goto fail;
- }
-
- TAILQ_INSERT_TAIL(&devargs_list, devargs, next);
-
- TAILQ_INSERT_TAIL(&vdev_device_list, dev, next);
- return 0;
-
-fail:
- free(devargs->args);
- free(devargs);
- free(dev);
- return ret;
-}
-
-static int
-vdev_remove_driver(struct rte_vdev_device *dev)
-{
- const char *name = rte_vdev_device_name(dev);
- const struct rte_vdev_driver *driver;
-
- if (!dev->device.driver) {
- RTE_LOG(DEBUG, EAL, "no driver attach to device %s\n", name);
- return 1;
- }
-
- driver = container_of(dev->device.driver, const struct rte_vdev_driver,
- driver);
- return driver->remove(dev);
-}
-
-int
-rte_vdev_uninit(const char *name)
-{
- struct rte_vdev_device *dev;
- struct rte_devargs *devargs;
- int ret;
-
- if (name == NULL)
- return -EINVAL;
-
- dev = find_vdev(name);
- if (!dev)
- return -ENOENT;
-
- devargs = dev->device.devargs;
-
- ret = vdev_remove_driver(dev);
- if (ret)
- return ret;
-
- TAILQ_REMOVE(&vdev_device_list, dev, next);
-
- TAILQ_REMOVE(&devargs_list, devargs, next);
-
- free(devargs->args);
- free(devargs);
- free(dev);
- return 0;
-}
-
-static int
-vdev_scan(void)
-{
- struct rte_vdev_device *dev;
- struct rte_devargs *devargs;
-
- /* for virtual devices we scan the devargs_list populated via cmdline */
- TAILQ_FOREACH(devargs, &devargs_list, next) {
-
- if (devargs->bus != &rte_vdev_bus)
- continue;
-
- dev = find_vdev(devargs->name);
- if (dev)
- continue;
-
- dev = calloc(1, sizeof(*dev));
- if (!dev)
- return -1;
-
- dev->device.devargs = devargs;
- dev->device.numa_node = SOCKET_ID_ANY;
- dev->device.name = devargs->name;
-
- TAILQ_INSERT_TAIL(&vdev_device_list, dev, next);
- }
-
- return 0;
-}
-
-static int
-vdev_probe(void)
-{
- struct rte_vdev_device *dev;
-
- /* call the init function for each virtual device */
- TAILQ_FOREACH(dev, &vdev_device_list, next) {
-
- if (dev->device.driver)
- continue;
-
- if (vdev_probe_all_drivers(dev)) {
- RTE_LOG(ERR, EAL, "failed to initialize %s device\n",
- rte_vdev_device_name(dev));
- return -1;
- }
- }
-
- return 0;
-}
-
-static struct rte_device *
-vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
- const void *data)
-{
- struct rte_vdev_device *dev;
-
- TAILQ_FOREACH(dev, &vdev_device_list, next) {
- if (start && &dev->device == start) {
- start = NULL;
- continue;
- }
- if (cmp(&dev->device, data) == 0)
- return &dev->device;
- }
- return NULL;
-}
-
-static int
-vdev_plug(struct rte_device *dev)
-{
- return vdev_probe_all_drivers(RTE_DEV_TO_VDEV(dev));
-}
-
-static int
-vdev_unplug(struct rte_device *dev)
-{
- return rte_vdev_uninit(dev->name);
-}
-
-static struct rte_bus rte_vdev_bus = {
- .scan = vdev_scan,
- .probe = vdev_probe,
- .find_device = vdev_find_device,
- .plug = vdev_plug,
- .unplug = vdev_unplug,
- .parse = vdev_parse,
-};
-
-RTE_REGISTER_BUS(vdev, rte_vdev_bus);
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 7b7e8c88..fa6ccbec 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -65,7 +65,6 @@ struct internal_config {
volatile unsigned force_nrank; /**< force number of ranks */
volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */
unsigned hugepage_unlink; /**< true to unlink backing files */
- volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
volatile unsigned no_pci; /**< true to disable PCI */
volatile unsigned no_hpet; /**< true to disable HPET */
volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
@@ -82,7 +81,7 @@ struct internal_config {
volatile enum rte_intr_mode vfio_intr_mode;
const char *hugefile_prefix; /**< the base filename of hugetlbfs files */
const char *hugepage_dir; /**< specific hugetlbfs directory to use */
-
+ const char *mbuf_pool_ops_name; /**< mbuf pool ops name */
unsigned num_hugepage_sizes; /**< how many sizes on this system */
struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
};
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index 439a2610..30e6bb41 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -61,6 +61,8 @@ enum {
OPT_LOG_LEVEL_NUM,
#define OPT_MASTER_LCORE "master-lcore"
OPT_MASTER_LCORE_NUM,
+#define OPT_MBUF_POOL_OPS_NAME "mbuf-pool-ops-name"
+ OPT_MBUF_POOL_OPS_NAME_NUM,
#define OPT_PROC_TYPE "proc-type"
OPT_PROC_TYPE_NUM,
#define OPT_NO_HPET "no-hpet"
@@ -81,8 +83,6 @@ enum {
OPT_VFIO_INTR_NUM,
#define OPT_VMWARE_TSC_MAP "vmware-tsc-map"
OPT_VMWARE_TSC_MAP_NUM,
-#define OPT_XEN_DOM0 "xen-dom0"
- OPT_XEN_DOM0_NUM,
OPT_LONG_MAX_NUM
};
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 597d82e4..462226f1 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -35,8 +35,8 @@
#define _EAL_PRIVATE_H_
#include <stdbool.h>
+#include <stdint.h>
#include <stdio.h>
-#include <rte_pci.h>
/**
* Initialize the memzone subsystem (private to eal).
@@ -109,137 +109,6 @@ int rte_eal_timer_init(void);
*/
int rte_eal_log_init(const char *id, int facility);
-struct rte_pci_driver;
-struct rte_pci_device;
-
-/**
- * Find the name of a PCI device.
- */
-void pci_name_set(struct rte_pci_device *dev);
-
-/**
- * Add a PCI device to the PCI Bus (append to PCI Device list). This function
- * also updates the bus references of the PCI Device (and the generic device
- * object embedded within.
- *
- * @param pci_dev
- * PCI device to add
- * @return void
- */
-void rte_pci_add_device(struct rte_pci_device *pci_dev);
-
-/**
- * Insert a PCI device in the PCI Bus at a particular location in the device
- * list. It also updates the PCI Bus reference of the new devices to be
- * inserted.
- *
- * @param exist_pci_dev
- * Existing PCI device in PCI Bus
- * @param new_pci_dev
- * PCI device to be added before exist_pci_dev
- * @return void
- */
-void rte_pci_insert_device(struct rte_pci_device *exist_pci_dev,
- struct rte_pci_device *new_pci_dev);
-
-/**
- * Remove a PCI device from the PCI Bus. This sets to NULL the bus references
- * in the PCI device object as well as the generic device object.
- *
- * @param pci_device
- * PCI device to be removed from PCI Bus
- * @return void
- */
-void rte_pci_remove_device(struct rte_pci_device *pci_device);
-
-/**
- * Update a pci device object by asking the kernel for the latest information.
- *
- * This function is private to EAL.
- *
- * @param addr
- * The PCI Bus-Device-Function address to look for
- * @return
- * - 0 on success.
- * - negative on error.
- */
-int pci_update_device(const struct rte_pci_addr *addr);
-
-/**
- * Unbind kernel driver for this device
- *
- * This function is private to EAL.
- *
- * @return
- * 0 on success, negative on error
- */
-int pci_unbind_kernel_driver(struct rte_pci_device *dev);
-
-/**
- * Map the PCI resource of a PCI device in virtual memory
- *
- * This function is private to EAL.
- *
- * @return
- * 0 on success, negative on error
- */
-int pci_uio_map_resource(struct rte_pci_device *dev);
-
-/**
- * Unmap the PCI resource of a PCI device
- *
- * This function is private to EAL.
- */
-void pci_uio_unmap_resource(struct rte_pci_device *dev);
-
-/**
- * Allocate uio resource for PCI device
- *
- * This function is private to EAL.
- *
- * @param dev
- * PCI device to allocate uio resource
- * @param uio_res
- * Pointer to uio resource.
- * If the function returns 0, the pointer will be filled.
- * @return
- * 0 on success, negative on error
- */
-int pci_uio_alloc_resource(struct rte_pci_device *dev,
- struct mapped_pci_resource **uio_res);
-
-/**
- * Free uio resource for PCI device
- *
- * This function is private to EAL.
- *
- * @param dev
- * PCI device to free uio resource
- * @param uio_res
- * Pointer to uio resource.
- */
-void pci_uio_free_resource(struct rte_pci_device *dev,
- struct mapped_pci_resource *uio_res);
-
-/**
- * Map device memory to uio resource
- *
- * This function is private to EAL.
- *
- * @param dev
- * PCI device that has memory information.
- * @param res_idx
- * Memory resource index of the PCI device.
- * @param uio_res
- * uio resource that will keep mapping information.
- * @param map_idx
- * Mapping information index of the uio resource.
- * @return
- * 0 on success, negative on error
- */
-int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
- struct mapped_pci_resource *uio_res, int map_idx);
-
/**
* Init tail queues for non-EAL library structures. This is to allow
* the rings, mempools, etc. lists to be shared among multiple processes
@@ -315,6 +184,17 @@ void set_tsc_freq(void);
uint64_t get_tsc_freq(void);
/**
+ * Get TSC frequency if the architecture supports.
+ *
+ * This function is private to the EAL.
+ *
+ * @return
+ * The number of TSC cycles in one second.
+ * Returns zero if the architecture support is not available.
+ */
+uint64_t get_tsc_freq_arch(void);
+
+/**
* Prepare physical memory mapping
* i.e. hugepages on Linux and
* contigmem on BSD.
@@ -333,17 +213,6 @@ int rte_eal_hugepage_init(void);
int rte_eal_hugepage_attach(void);
/**
- * Returns true if the system is able to obtain
- * physical addresses. Return false if using DMA
- * addresses through an IOMMU.
- *
- * Drivers based on uio will not load unless physical
- * addresses are obtainable. It is only possible to get
- * physical addresses when running as a privileged user.
- */
-bool rte_eal_using_phys_addrs(void);
-
-/**
* Find a bus capable of identifying a device.
*
* @param str
diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
index 782350d1..aa887a97 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
@@ -136,7 +136,7 @@ vgetq_lane_p64(poly64x2_t x, const int lane)
#endif
/*
- * If (0 <= index <= 15), then call the ASIMD ext intruction on the
+ * If (0 <= index <= 15), then call the ASIMD ext instruction on the
* 128 bit regs v0 and v1 with the appropriate index.
*
* Else returns a zero vector.
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
index 2e04c759..fb3abf18 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
@@ -81,7 +81,7 @@ rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
: "memory" ); /* no-clobber list */
#else
asm volatile (
- "mov %%ebx, %%edi\n"
+ "xchgl %%ebx, %%edi;\n"
MPLOCKED
"cmpxchg8b (%[dst]);"
"setz %[res];"
diff --git a/lib/librte_sched/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h
index 010d752c..010d752c 100644
--- a/lib/librte_sched/rte_bitmap.h
+++ b/lib/librte_eal/common/include/rte_bitmap.h
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index c79368d3..6fb08341 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -55,6 +55,21 @@ extern "C" {
/** Double linked list of buses */
TAILQ_HEAD(rte_bus_list, rte_bus);
+
+/**
+ * IOVA mapping mode.
+ *
+ * IOVA mapping mode is iommu programming mode of a device.
+ * That device (for example: IOMMU backed DMA device) based
+ * on rte_iova_mode will generate physical or virtual address.
+ *
+ */
+enum rte_iova_mode {
+ RTE_IOVA_DC = 0, /* Don't care mode */
+ RTE_IOVA_PA = (1 << 0), /* DMA using physical address */
+ RTE_IOVA_VA = (1 << 1) /* DMA using virtual address */
+};
+
/**
* Bus specific scan for devices attached on the bus.
* For each bus object, the scan would be responsible for finding devices and
@@ -168,6 +183,20 @@ struct rte_bus_conf {
enum rte_bus_scan_mode scan_mode; /**< Scan policy. */
};
+
+/**
+ * Get common iommu class of the all the devices on the bus. The bus may
+ * check that those devices are attached to iommu driver.
+ * If no devices are attached to the bus. The bus may return with don't care
+ * (_DC) value.
+ * Otherwise, The bus will return appropriate _pa or _va iova mode.
+ *
+ * @return
+ * enum rte_iova_mode value.
+ */
+typedef enum rte_iova_mode (*rte_bus_get_iommu_class_t)(void);
+
+
/**
* A structure describing a generic bus.
*/
@@ -181,6 +210,7 @@ struct rte_bus {
rte_bus_unplug_t unplug; /**< Remove single device from driver */
rte_bus_parse_t parse; /**< Parse a device name */
struct rte_bus_conf conf; /**< Bus configuration */
+ rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
};
/**
@@ -280,12 +310,22 @@ struct rte_bus *rte_bus_find_by_device(const struct rte_device *dev);
*/
struct rte_bus *rte_bus_find_by_name(const char *busname);
+
+/**
+ * Get the common iommu class of devices bound on to buses available in the
+ * system. The default mode is PA.
+ *
+ * @return
+ * enum rte_iova_mode value.
+ */
+enum rte_iova_mode rte_bus_get_iommu_class(void);
+
/**
* Helper for Bus registration.
* The constructor has higher priority than PMD constructors.
*/
#define RTE_REGISTER_BUS(nm, bus) \
-RTE_INIT_PRIO(businitfn_ ##nm, 101); \
+RTE_INIT_PRIO(businitfn_ ##nm, 110); \
static void businitfn_ ##nm(void) \
{\
(bus).name = RTE_STR(nm);\
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index 1afc66e3..de853e16 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -109,6 +109,29 @@ typedef uint16_t unaligned_uint16_t;
#define RTE_SET_USED(x) (void)(x)
/**
+ * Run function before main() with low priority.
+ *
+ * The constructor will be run after prioritized constructors.
+ *
+ * @param func
+ * Constructor function.
+ */
+#define RTE_INIT(func) \
+static void __attribute__((constructor, used)) func(void)
+
+/**
+ * Run function before main() with high priority.
+ *
+ * @param func
+ * Constructor function.
+ * @param prio
+ * Priority number must be above 100.
+ * Lowest number is the first to run.
+ */
+#define RTE_INIT_PRIO(func, prio) \
+static void __attribute__((constructor(prio), used)) func(void)
+
+/**
* Force a function to be inlined
*/
#define __rte_always_inline inline __attribute__((always_inline))
diff --git a/lib/librte_eal/common/include/rte_debug.h b/lib/librte_eal/common/include/rte_debug.h
index cab6fb4c..79b67b3e 100644
--- a/lib/librte_eal/common/include/rte_debug.h
+++ b/lib/librte_eal/common/include/rte_debug.h
@@ -79,7 +79,7 @@ void rte_dump_registers(void);
#define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy")
#define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__)
-#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG
+#ifdef RTE_ENABLE_ASSERT
#define RTE_ASSERT(exp) RTE_VERIFY(exp)
#else
#define RTE_ASSERT(exp) do {} while (0)
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index 5386d3a2..9342e0cb 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -49,7 +49,6 @@ extern "C" {
#include <stdio.h>
#include <sys/queue.h>
-#include <rte_config.h>
#include <rte_log.h>
__attribute__((format(printf, 2, 0)))
@@ -152,7 +151,11 @@ struct rte_driver {
const char *alias; /**< Driver alias. */
};
-#define RTE_DEV_NAME_MAX_LEN (32)
+/*
+ * Internal identifier length
+ * Sufficiently large to allow for UUID or PCI address
+ */
+#define RTE_DEV_NAME_MAX_LEN 64
/**
* A structure describing a generic device.
@@ -166,28 +169,6 @@ struct rte_device {
};
/**
- * Initialize a driver specified by name.
- *
- * @param name
- * The pointer to a driver name to be initialized.
- * @param args
- * The pointer to arguments used by driver initialization.
- * @return
- * 0 on success, negative on error
- */
-int rte_vdev_init(const char *name, const char *args);
-
-/**
- * Uninitalize a driver specified by name.
- *
- * @param name
- * The pointer to a driver name to be initialized.
- * @return
- * 0 on success, negative on error
- */
-int rte_vdev_uninit(const char *name);
-
-/**
* Attach a device to a registered driver.
*
* @param name
@@ -312,4 +293,4 @@ __attribute__((used)) = str
}
#endif
-#endif /* _RTE_VDEV_H_ */
+#endif /* _RTE_DEV_H_ */
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index 0e7363d7..09b66819 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -44,7 +44,9 @@
#include <sched.h>
#include <rte_per_lcore.h>
-#include <rte_config.h>
+#include <rte_bus.h>
+
+#include <rte_pci_dev_feature_defs.h>
#ifdef __cplusplus
extern "C" {
@@ -87,6 +89,9 @@ struct rte_config {
/** Primary or secondary configuration */
enum rte_proc_type_t process_type;
+ /** PA or VA mapping mode */
+ enum rte_iova_mode iova_mode;
+
/**
* Pointer to memory configuration, which may be shared across multiple
* DPDK instances
@@ -264,6 +269,32 @@ rte_set_application_usage_hook(rte_usage_hook_t usage_func);
int rte_eal_has_hugepages(void);
/**
+ * Whether EAL is using PCI bus.
+ * Disabled by --no-pci option.
+ *
+ * @return
+ * Nonzero if the PCI bus is enabled.
+ */
+int rte_eal_has_pci(void);
+
+/**
+ * Whether the EAL was asked to create UIO device.
+ *
+ * @return
+ * Nonzero if true.
+ */
+int rte_eal_create_uio_dev(void);
+
+/**
+ * The user-configured vfio interrupt mode.
+ *
+ * @return
+ * Interrupt mode configured with the command line,
+ * RTE_INTR_MODE_NONE by default.
+ */
+enum rte_intr_mode rte_eal_vfio_intr_mode(void);
+
+/**
* A wrap API for syscall gettid.
*
* @return
@@ -287,11 +318,22 @@ static inline int rte_gettid(void)
return RTE_PER_LCORE(_thread_id);
}
-#define RTE_INIT(func) \
-static void __attribute__((constructor, used)) func(void)
+/**
+ * Get the iova mode
+ *
+ * @return
+ * enum rte_iova_mode value.
+ */
+enum rte_iova_mode rte_eal_iova_mode(void);
-#define RTE_INIT_PRIO(func, prio) \
-static void __attribute__((constructor(prio), used)) func(void)
+/**
+ * Get default pool ops name for mbuf
+ *
+ * @return
+ * returns default pool ops name.
+ */
+const char *
+rte_eal_mbuf_default_mempool_ops(void);
#ifdef __cplusplus
}
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/common/include/rte_eal_interrupts.h
index 6daffebf..031f78cc 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
+++ b/lib/librte_eal/common/include/rte_eal_interrupts.h
@@ -35,15 +35,26 @@
#error "don't include this file directly, please include generic <rte_interrupts.h>"
#endif
-#ifndef _RTE_LINUXAPP_INTERRUPTS_H_
-#define _RTE_LINUXAPP_INTERRUPTS_H_
+/**
+ * @file rte_eal_interrupts.h
+ * @internal
+ *
+ * Contains function prototypes exposed by the EAL for interrupt handling by
+ * drivers and other DPDK internal consumers.
+ */
+
+#ifndef _RTE_EAL_INTERRUPTS_H_
+#define _RTE_EAL_INTERRUPTS_H_
#define RTE_MAX_RXTX_INTR_VEC_ID 32
#define RTE_INTR_VEC_ZERO_OFFSET 0
#define RTE_INTR_VEC_RXTX_OFFSET 1
+/**
+ * The interrupt source type, e.g. UIO, VFIO, ALARM etc.
+ */
enum rte_intr_handle_type {
- RTE_INTR_HANDLE_UNKNOWN = 0,
+ RTE_INTR_HANDLE_UNKNOWN = 0, /**< generic unknown handle */
RTE_INTR_HANDLE_UIO, /**< uio device handle */
RTE_INTR_HANDLE_UIO_INTX, /**< uio generic handle */
RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */
@@ -52,7 +63,7 @@ enum rte_intr_handle_type {
RTE_INTR_HANDLE_ALARM, /**< alarm handle */
RTE_INTR_HANDLE_EXT, /**< external handler */
RTE_INTR_HANDLE_VDEV, /**< virtual device */
- RTE_INTR_HANDLE_MAX
+ RTE_INTR_HANDLE_MAX /**< count of elements */
};
#define RTE_INTR_EVENT_ADD 1UL
@@ -86,13 +97,13 @@ struct rte_intr_handle {
RTE_STD_C11
union {
int vfio_dev_fd; /**< VFIO device file descriptor */
- int uio_cfg_fd; /**< UIO config file descriptor
- for uio_pci_generic */
+ int uio_cfg_fd; /**< UIO cfg file desc for uio_pci_generic */
};
int fd; /**< interrupt event file descriptor */
enum rte_intr_handle_type type; /**< handle type */
uint32_t max_intr; /**< max interrupt requested */
uint32_t nb_efd; /**< number of available efd(event fd) */
+ uint8_t efd_counter_size; /**< size of efd counter, used for vdev */
int efds[RTE_MAX_RXTX_INTR_VEC_ID]; /**< intr vectors/efds mapping */
struct rte_epoll_event elist[RTE_MAX_RXTX_INTR_VEC_ID];
/**< intr vector epoll event */
@@ -236,4 +247,4 @@ rte_intr_allow_others(struct rte_intr_handle *intr_handle);
int
rte_intr_cap_multiple(struct rte_intr_handle *intr_handle);
-#endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */
+#endif /* _RTE_EAL_INTERRUPTS_H_ */
diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h
index 5d06ed79..43177c7a 100644
--- a/lib/librte_eal/common/include/rte_interrupts.h
+++ b/lib/librte_eal/common/include/rte_interrupts.h
@@ -53,7 +53,7 @@ struct rte_intr_handle;
/** Function to be registered for the specific interrupt */
typedef void (*rte_intr_callback_fn)(void *cb_arg);
-#include <exec-env/rte_interrupts.h>
+#include "rte_eal_interrupts.h"
/**
* It registers the callback for the specific interrupt. Multiple
diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h
index 50e0d0fe..c89e6bab 100644
--- a/lib/librte_eal/common/include/rte_lcore.h
+++ b/lib/librte_eal/common/include/rte_lcore.h
@@ -262,6 +262,20 @@ void rte_thread_get_affinity(rte_cpuset_t *cpusetp);
*/
int rte_thread_setname(pthread_t id, const char *name);
+/**
+ * Test if the core supplied has a specific role
+ *
+ * @param lcore_id
+ * The identifier of the lcore, which MUST be between 0 and
+ * RTE_MAX_LCORE-1.
+ * @param role
+ * The role to be checked against.
+ * @return
+ * On success, return 0; otherwise return a negative value.
+ */
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h
index ec8dba79..16564d41 100644
--- a/lib/librte_eal/common/include/rte_log.h
+++ b/lib/librte_eal/common/include/rte_log.h
@@ -87,6 +87,7 @@ extern struct rte_logs rte_logs;
#define RTE_LOGTYPE_CRYPTODEV 17 /**< Log related to cryptodev. */
#define RTE_LOGTYPE_EFD 18 /**< Log related to EFD. */
#define RTE_LOGTYPE_EVENTDEV 19 /**< Log related to eventdev. */
+#define RTE_LOGTYPE_GSO 20 /**< Log related to GSO. */
/* these log types can be used in an application */
#define RTE_LOGTYPE_USER1 24 /**< User-defined log type 1. */
@@ -138,12 +139,6 @@ int rte_openlog_stream(FILE *f);
void rte_log_set_global_level(uint32_t level);
/**
- * Deprecated, replaced by rte_log_set_global_level().
- */
-__rte_deprecated
-void rte_set_log_level(uint32_t level);
-
-/**
* Get the global log level.
*
* @return
@@ -152,29 +147,6 @@ void rte_set_log_level(uint32_t level);
uint32_t rte_log_get_global_level(void);
/**
- * Deprecated, replaced by rte_log_get_global_level().
- */
-__rte_deprecated
-uint32_t rte_get_log_level(void);
-
-/**
- * Enable or disable the log type.
- *
- * @param type
- * Log type, for example, RTE_LOGTYPE_EAL.
- * @param enable
- * True for enable; false for disable.
- */
-__rte_deprecated
-void rte_set_log_type(uint32_t type, int enable);
-
-/**
- * Get the global log type.
- */
-__rte_deprecated
-uint32_t rte_get_log_type(void);
-
-/**
* Get the log level for a given type.
*
* @param logtype
diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
index 3d37f79b..5d4c11a7 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -323,17 +323,24 @@ int
rte_malloc_set_limit(const char *type, size_t max);
/**
- * Return the physical address of a virtual address obtained through
+ * Return the IO address of a virtual address obtained through
* rte_malloc
*
* @param addr
* Address obtained from a previous rte_malloc call
* @return
- * RTE_BAD_PHYS_ADDR on error
- * otherwise return physical address of the buffer
+ * RTE_BAD_IOVA on error
+ * otherwise return an address suitable for IO
*/
-phys_addr_t
-rte_malloc_virt2phy(const void *addr);
+rte_iova_t
+rte_malloc_virt2iova(const void *addr);
+
+__rte_deprecated
+static inline phys_addr_t
+rte_malloc_virt2phy(const void *addr)
+{
+ return rte_malloc_virt2iova(addr);
+}
#ifdef __cplusplus
}
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 4aa5d1f7..14aacea5 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -44,12 +44,6 @@
#include <stddef.h>
#include <stdio.h>
-#include <rte_config.h>
-
-#ifdef RTE_EXEC_ENV_LINUXAPP
-#include <exec-env/rte_dom0_common.h>
-#endif
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -98,14 +92,27 @@ enum rte_page_sizes {
*/
#define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
-typedef uint64_t phys_addr_t; /**< Physical address definition. */
+typedef uint64_t phys_addr_t; /**< Physical address. */
#define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1)
+/**
+ * IO virtual address type.
+ * When the physical addressing mode (IOVA as PA) is in use,
+ * the translation from an IO virtual address (IOVA) to a physical address
+ * is a direct mapping, i.e. the same value.
+ * Otherwise, in virtual mode (IOVA as VA), an IOMMU may do the translation.
+ */
+typedef uint64_t rte_iova_t;
+#define RTE_BAD_IOVA ((rte_iova_t)-1)
/**
* Physical memory segment descriptor.
*/
struct rte_memseg {
- phys_addr_t phys_addr; /**< Start physical address. */
+ RTE_STD_C11
+ union {
+ phys_addr_t phys_addr; /**< deprecated - Start physical address. */
+ rte_iova_t iova; /**< Start IO address. */
+ };
RTE_STD_C11
union {
void *addr; /**< Start virtual address. */
@@ -116,10 +123,6 @@ struct rte_memseg {
int32_t socket_id; /**< NUMA socket ID. */
uint32_t nchannel; /**< Number of channels. */
uint32_t nrank; /**< Number of ranks. */
-#ifdef RTE_LIBRTE_XEN_DOM0
- /**< store segment MFNs */
- uint64_t mfn[DOM0_NUM_MEMBLOCK];
-#endif
} __rte_packed;
/**
@@ -140,11 +143,21 @@ int rte_mem_lock_page(const void *virt);
* @param virt
* The virtual address.
* @return
- * The physical address or RTE_BAD_PHYS_ADDR on error.
+ * The physical address or RTE_BAD_IOVA on error.
*/
phys_addr_t rte_mem_virt2phy(const void *virt);
/**
+ * Get IO virtual address of any mapped virtual address in the current process.
+ *
+ * @param virt
+ * The virtual address.
+ * @return
+ * The IO address or RTE_BAD_IOVA on error.
+ */
+rte_iova_t rte_mem_virt2iova(const void *virt);
+
+/**
* Get the layout of the available physical memory.
*
* It can be useful for an application to have the full physical
@@ -195,68 +208,16 @@ unsigned rte_memory_get_nchannel(void);
*/
unsigned rte_memory_get_nrank(void);
-#ifdef RTE_LIBRTE_XEN_DOM0
-
-/**< Internal use only - should DOM0 memory mapping be used */
-int rte_xen_dom0_supported(void);
-
-/**< Internal use only - phys to virt mapping for xen */
-phys_addr_t rte_xen_mem_phy2mch(int32_t, const phys_addr_t);
-
/**
- * Return the physical address of elt, which is an element of the pool mp.
- *
- * @param memseg_id
- * Identifier of the memory segment owning the physical address. If
- * set to -1, find it automatically.
- * @param phy_addr
- * physical address of elt.
- *
- * @return
- * The physical address or RTE_BAD_PHYS_ADDR on error.
- */
-static inline phys_addr_t
-rte_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr)
-{
- if (rte_xen_dom0_supported())
- return rte_xen_mem_phy2mch(memseg_id, phy_addr);
- else
- return phy_addr;
-}
-
-/**
- * Memory init for supporting application running on Xen domain0.
- *
- * @param void
+ * Drivers based on uio will not load unless physical
+ * addresses are obtainable. It is only possible to get
+ * physical addresses when running as a privileged user.
*
* @return
- * 0: successfully
- * negative: error
+ * 1 if the system is able to obtain physical addresses.
+ * 0 if using DMA addresses through an IOMMU.
*/
-int rte_xen_dom0_memory_init(void);
-
-/**
- * Attach to memory setments of primary process on Xen domain0.
- *
- * @param void
- *
- * @return
- * 0: successfully
- * negative: error
- */
-int rte_xen_dom0_memory_attach(void);
-#else
-static inline int rte_xen_dom0_supported(void)
-{
- return 0;
-}
-
-static inline phys_addr_t
-rte_mem_phy2mch(int32_t memseg_id __rte_unused, const phys_addr_t phy_addr)
-{
- return phy_addr;
-}
-#endif
+int rte_eal_using_phys_addrs(void);
#ifdef __cplusplus
}
diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h
index 1d0827f4..6f0ba182 100644
--- a/lib/librte_eal/common/include/rte_memzone.h
+++ b/lib/librte_eal/common/include/rte_memzone.h
@@ -78,7 +78,11 @@ struct rte_memzone {
#define RTE_MEMZONE_NAMESIZE 32 /**< Maximum length of memory zone name.*/
char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the memory zone. */
- phys_addr_t phys_addr; /**< Start physical address. */
+ RTE_STD_C11
+ union {
+ phys_addr_t phys_addr; /**< deprecated - Start physical address. */
+ rte_iova_t iova; /**< Start IO address. */
+ };
RTE_STD_C11
union {
void *addr; /**< Start virtual address. */
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
deleted file mode 100644
index 8b123391..00000000
--- a/lib/librte_eal/common/include/rte_pci.h
+++ /dev/null
@@ -1,598 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- * Copyright 2013-2014 6WIND S.A.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _RTE_PCI_H_
-#define _RTE_PCI_H_
-
-/**
- * @file
- *
- * RTE PCI Interface
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <limits.h>
-#include <errno.h>
-#include <sys/queue.h>
-#include <stdint.h>
-#include <inttypes.h>
-
-#include <rte_debug.h>
-#include <rte_interrupts.h>
-#include <rte_dev.h>
-#include <rte_bus.h>
-
-/** Pathname of PCI devices directory. */
-const char *pci_get_sysfs_path(void);
-
-/** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */
-#define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
-#define PCI_PRI_STR_SIZE sizeof("XXXXXXXX:XX:XX.X")
-
-/** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */
-#define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
-
-/** Nb. of values in PCI device identifier format string. */
-#define PCI_FMT_NVAL 4
-
-/** Nb. of values in PCI resource format. */
-#define PCI_RESOURCE_FMT_NVAL 3
-
-/** Maximum number of PCI resources. */
-#define PCI_MAX_RESOURCE 6
-
-/* Forward declarations */
-struct rte_pci_device;
-struct rte_pci_driver;
-
-/** List of PCI devices */
-TAILQ_HEAD(rte_pci_device_list, rte_pci_device);
-/** List of PCI drivers */
-TAILQ_HEAD(rte_pci_driver_list, rte_pci_driver);
-
-/* PCI Bus iterators */
-#define FOREACH_DEVICE_ON_PCIBUS(p) \
- TAILQ_FOREACH(p, &(rte_pci_bus.device_list), next)
-
-#define FOREACH_DRIVER_ON_PCIBUS(p) \
- TAILQ_FOREACH(p, &(rte_pci_bus.driver_list), next)
-
-/**
- * A structure describing an ID for a PCI driver. Each driver provides a
- * table of these IDs for each device that it supports.
- */
-struct rte_pci_id {
- uint32_t class_id; /**< Class ID (class, subclass, pi) or RTE_CLASS_ANY_ID. */
- uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */
- uint16_t device_id; /**< Device ID or PCI_ANY_ID. */
- uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */
- uint16_t subsystem_device_id; /**< Subsystem device ID or PCI_ANY_ID. */
-};
-
-/**
- * A structure describing the location of a PCI device.
- */
-struct rte_pci_addr {
- uint32_t domain; /**< Device domain */
- uint8_t bus; /**< Device bus */
- uint8_t devid; /**< Device ID */
- uint8_t function; /**< Device function. */
-};
-
-struct rte_devargs;
-
-/**
- * A structure describing a PCI device.
- */
-struct rte_pci_device {
- TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */
- struct rte_device device; /**< Inherit core device */
- struct rte_pci_addr addr; /**< PCI location. */
- struct rte_pci_id id; /**< PCI ID. */
- struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE];
- /**< PCI Memory Resource */
- struct rte_intr_handle intr_handle; /**< Interrupt handle */
- struct rte_pci_driver *driver; /**< Associated driver */
- uint16_t max_vfs; /**< sriov enable if not zero */
- enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */
- char name[PCI_PRI_STR_SIZE+1]; /**< PCI location (ASCII) */
-};
-
-/**
- * @internal
- * Helper macro for drivers that need to convert to struct rte_pci_device.
- */
-#define RTE_DEV_TO_PCI(ptr) container_of(ptr, struct rte_pci_device, device)
-
-/** Any PCI device identifier (vendor, device, ...) */
-#define PCI_ANY_ID (0xffff)
-#define RTE_CLASS_ANY_ID (0xffffff)
-
-#ifdef __cplusplus
-/** C++ macro used to help building up tables of device IDs */
-#define RTE_PCI_DEVICE(vend, dev) \
- RTE_CLASS_ANY_ID, \
- (vend), \
- (dev), \
- PCI_ANY_ID, \
- PCI_ANY_ID
-#else
-/** Macro used to help building up tables of device IDs */
-#define RTE_PCI_DEVICE(vend, dev) \
- .class_id = RTE_CLASS_ANY_ID, \
- .vendor_id = (vend), \
- .device_id = (dev), \
- .subsystem_vendor_id = PCI_ANY_ID, \
- .subsystem_device_id = PCI_ANY_ID
-#endif
-
-/**
- * Initialisation function for the driver called during PCI probing.
- */
-typedef int (pci_probe_t)(struct rte_pci_driver *, struct rte_pci_device *);
-
-/**
- * Uninitialisation function for the driver called during hotplugging.
- */
-typedef int (pci_remove_t)(struct rte_pci_device *);
-
-/**
- * A structure describing a PCI driver.
- */
-struct rte_pci_driver {
- TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */
- struct rte_driver driver; /**< Inherit core driver. */
- struct rte_pci_bus *bus; /**< PCI bus reference. */
- pci_probe_t *probe; /**< Device Probe function. */
- pci_remove_t *remove; /**< Device Remove function. */
- const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */
- uint32_t drv_flags; /**< Flags contolling handling of device. */
-};
-
-/**
- * Structure describing the PCI bus
- */
-struct rte_pci_bus {
- struct rte_bus bus; /**< Inherit the generic class */
- struct rte_pci_device_list device_list; /**< List of PCI devices */
- struct rte_pci_driver_list driver_list; /**< List of PCI drivers */
-};
-
-/** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */
-#define RTE_PCI_DRV_NEED_MAPPING 0x0001
-/** Device driver supports link state interrupt */
-#define RTE_PCI_DRV_INTR_LSC 0x0008
-/** Device driver supports device removal interrupt */
-#define RTE_PCI_DRV_INTR_RMV 0x0010
-/** Device driver needs to keep mapped resources if unsupported dev detected */
-#define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020
-
-/**
- * A structure describing a PCI mapping.
- */
-struct pci_map {
- void *addr;
- char *path;
- uint64_t offset;
- uint64_t size;
- uint64_t phaddr;
-};
-
-/**
- * A structure describing a mapped PCI resource.
- * For multi-process we need to reproduce all PCI mappings in secondary
- * processes, so save them in a tailq.
- */
-struct mapped_pci_resource {
- TAILQ_ENTRY(mapped_pci_resource) next;
-
- struct rte_pci_addr pci_addr;
- char path[PATH_MAX];
- int nb_maps;
- struct pci_map maps[PCI_MAX_RESOURCE];
-};
-
-/** mapped pci device list */
-TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource);
-
-/**< Internal use only - Macro used by pci addr parsing functions **/
-#define GET_PCIADDR_FIELD(in, fd, lim, dlm) \
-do { \
- unsigned long val; \
- char *end; \
- errno = 0; \
- val = strtoul((in), &end, 16); \
- if (errno != 0 || end[0] != (dlm) || val > (lim)) \
- return -EINVAL; \
- (fd) = (typeof (fd))val; \
- (in) = end + 1; \
-} while(0)
-
-/**
- * Utility function to produce a PCI Bus-Device-Function value
- * given a string representation. Assumes that the BDF is provided without
- * a domain prefix (i.e. domain returned is always 0)
- *
- * @param input
- * The input string to be parsed. Should have the format XX:XX.X
- * @param dev_addr
- * The PCI Bus-Device-Function address to be returned. Domain will always be
- * returned as 0
- * @return
- * 0 on success, negative on error.
- */
-static inline int
-eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr)
-{
- dev_addr->domain = 0;
- GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':');
- GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.');
- GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0);
- return 0;
-}
-
-/**
- * Utility function to produce a PCI Bus-Device-Function value
- * given a string representation. Assumes that the BDF is provided including
- * a domain prefix.
- *
- * @param input
- * The input string to be parsed. Should have the format XXXX:XX:XX.X
- * @param dev_addr
- * The PCI Bus-Device-Function address to be returned
- * @return
- * 0 on success, negative on error.
- */
-static inline int
-eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr)
-{
- GET_PCIADDR_FIELD(input, dev_addr->domain, UINT16_MAX, ':');
- GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':');
- GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.');
- GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0);
- return 0;
-}
-#undef GET_PCIADDR_FIELD
-
-/**
- * Utility function to write a pci device name, this device name can later be
- * used to retrieve the corresponding rte_pci_addr using eal_parse_pci_*
- * BDF helpers.
- *
- * @param addr
- * The PCI Bus-Device-Function address
- * @param output
- * The output buffer string
- * @param size
- * The output buffer size
- */
-static inline void
-rte_pci_device_name(const struct rte_pci_addr *addr,
- char *output, size_t size)
-{
- RTE_VERIFY(size >= PCI_PRI_STR_SIZE);
- RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT,
- addr->domain, addr->bus,
- addr->devid, addr->function) >= 0);
-}
-
-/* Compare two PCI device addresses. */
-/**
- * Utility function to compare two PCI device addresses.
- *
- * @param addr
- * The PCI Bus-Device-Function address to compare
- * @param addr2
- * The PCI Bus-Device-Function address to compare
- * @return
- * 0 on equal PCI address.
- * Positive on addr is greater than addr2.
- * Negative on addr is less than addr2, or error.
- */
-static inline int
-rte_eal_compare_pci_addr(const struct rte_pci_addr *addr,
- const struct rte_pci_addr *addr2)
-{
- uint64_t dev_addr, dev_addr2;
-
- if ((addr == NULL) || (addr2 == NULL))
- return -1;
-
- dev_addr = ((uint64_t)addr->domain << 24) |
- (addr->bus << 16) | (addr->devid << 8) | addr->function;
- dev_addr2 = ((uint64_t)addr2->domain << 24) |
- (addr2->bus << 16) | (addr2->devid << 8) | addr2->function;
-
- if (dev_addr > dev_addr2)
- return 1;
- else if (dev_addr < dev_addr2)
- return -1;
- else
- return 0;
-}
-
-/**
- * Scan the content of the PCI bus, and the devices in the devices
- * list
- *
- * @return
- * 0 on success, negative on error
- */
-int rte_pci_scan(void);
-
-/**
- * Probe the PCI bus
- *
- * @return
- * - 0 on success.
- * - !0 on error.
- */
-int
-rte_pci_probe(void);
-
-/**
- * Map the PCI device resources in user space virtual memory address
- *
- * Note that driver should not call this function when flag
- * RTE_PCI_DRV_NEED_MAPPING is set, as EAL will do that for
- * you when it's on.
- *
- * @param dev
- * A pointer to a rte_pci_device structure describing the device
- * to use
- *
- * @return
- * 0 on success, negative on error and positive if no driver
- * is found for the device.
- */
-int rte_pci_map_device(struct rte_pci_device *dev);
-
-/**
- * Unmap this device
- *
- * @param dev
- * A pointer to a rte_pci_device structure describing the device
- * to use
- */
-void rte_pci_unmap_device(struct rte_pci_device *dev);
-
-/**
- * @internal
- * Map a particular resource from a file.
- *
- * @param requested_addr
- * The starting address for the new mapping range.
- * @param fd
- * The file descriptor.
- * @param offset
- * The offset for the mapping range.
- * @param size
- * The size for the mapping range.
- * @param additional_flags
- * The additional flags for the mapping range.
- * @return
- * - On success, the function returns a pointer to the mapped area.
- * - On error, the value MAP_FAILED is returned.
- */
-void *pci_map_resource(void *requested_addr, int fd, off_t offset,
- size_t size, int additional_flags);
-
-/**
- * @internal
- * Unmap a particular resource.
- *
- * @param requested_addr
- * The address for the unmapping range.
- * @param size
- * The size for the unmapping range.
- */
-void pci_unmap_resource(void *requested_addr, size_t size);
-
-/**
- * Probe the single PCI device.
- *
- * Scan the content of the PCI bus, and find the pci device specified by pci
- * address, then call the probe() function for registered driver that has a
- * matching entry in its id_table for discovered device.
- *
- * @param addr
- * The PCI Bus-Device-Function address to probe.
- * @return
- * - 0 on success.
- * - Negative on error.
- */
-int rte_pci_probe_one(const struct rte_pci_addr *addr);
-
-/**
- * Close the single PCI device.
- *
- * Scan the content of the PCI bus, and find the pci device specified by pci
- * address, then call the remove() function for registered driver that has a
- * matching entry in its id_table for discovered device.
- *
- * @param addr
- * The PCI Bus-Device-Function address to close.
- * @return
- * - 0 on success.
- * - Negative on error.
- */
-int rte_pci_detach(const struct rte_pci_addr *addr);
-
-/**
- * Dump the content of the PCI bus.
- *
- * @param f
- * A pointer to a file for output
- */
-void rte_pci_dump(FILE *f);
-
-/**
- * Register a PCI driver.
- *
- * @param driver
- * A pointer to a rte_pci_driver structure describing the driver
- * to be registered.
- */
-void rte_pci_register(struct rte_pci_driver *driver);
-
-/** Helper for PCI device registration from driver (eth, crypto) instance */
-#define RTE_PMD_REGISTER_PCI(nm, pci_drv) \
-RTE_INIT(pciinitfn_ ##nm); \
-static void pciinitfn_ ##nm(void) \
-{\
- (pci_drv).driver.name = RTE_STR(nm);\
- rte_pci_register(&pci_drv); \
-} \
-RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
-
-/**
- * Unregister a PCI driver.
- *
- * @param driver
- * A pointer to a rte_pci_driver structure describing the driver
- * to be unregistered.
- */
-void rte_pci_unregister(struct rte_pci_driver *driver);
-
-/**
- * Read PCI config space.
- *
- * @param device
- * A pointer to a rte_pci_device structure describing the device
- * to use
- * @param buf
- * A data buffer where the bytes should be read into
- * @param len
- * The length of the data buffer.
- * @param offset
- * The offset into PCI config space
- */
-int rte_pci_read_config(const struct rte_pci_device *device,
- void *buf, size_t len, off_t offset);
-
-/**
- * Write PCI config space.
- *
- * @param device
- * A pointer to a rte_pci_device structure describing the device
- * to use
- * @param buf
- * A data buffer containing the bytes should be written
- * @param len
- * The length of the data buffer.
- * @param offset
- * The offset into PCI config space
- */
-int rte_pci_write_config(const struct rte_pci_device *device,
- const void *buf, size_t len, off_t offset);
-
-/**
- * A structure used to access io resources for a pci device.
- * rte_pci_ioport is arch, os, driver specific, and should not be used outside
- * of pci ioport api.
- */
-struct rte_pci_ioport {
- struct rte_pci_device *dev;
- uint64_t base;
- uint64_t len; /* only filled for memory mapped ports */
-};
-
-/**
- * Initialize a rte_pci_ioport object for a pci device io resource.
- *
- * This object is then used to gain access to those io resources (see below).
- *
- * @param dev
- * A pointer to a rte_pci_device structure describing the device
- * to use.
- * @param bar
- * Index of the io pci resource we want to access.
- * @param p
- * The rte_pci_ioport object to be initialized.
- * @return
- * 0 on success, negative on error.
- */
-int rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p);
-
-/**
- * Release any resources used in a rte_pci_ioport object.
- *
- * @param p
- * The rte_pci_ioport object to be uninitialized.
- * @return
- * 0 on success, negative on error.
- */
-int rte_pci_ioport_unmap(struct rte_pci_ioport *p);
-
-/**
- * Read from a io pci resource.
- *
- * @param p
- * The rte_pci_ioport object from which we want to read.
- * @param data
- * A data buffer where the bytes should be read into
- * @param len
- * The length of the data buffer.
- * @param offset
- * The offset into the pci io resource.
- */
-void rte_pci_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset);
-
-/**
- * Write to a io pci resource.
- *
- * @param p
- * The rte_pci_ioport object to which we want to write.
- * @param data
- * A data buffer where the bytes should be read into
- * @param len
- * The length of the data buffer.
- * @param offset
- * The offset into the pci io resource.
- */
-void rte_pci_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _RTE_PCI_H_ */
diff --git a/lib/librte_eal/common/include/rte_service.h b/lib/librte_eal/common/include/rte_service.h
index 7c6f7383..92724406 100644
--- a/lib/librte_eal/common/include/rte_service.h
+++ b/lib/librte_eal/common/include/rte_service.h
@@ -61,9 +61,6 @@ extern "C" {
#include <rte_lcore.h>
-/* forward declaration only. Definition in rte_service_private.h */
-struct rte_service_spec;
-
#define RTE_SERVICE_NAME_MAX 32
/* Capabilities of a service.
@@ -89,40 +86,32 @@ struct rte_service_spec;
*/
uint32_t rte_service_get_count(void);
-
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Return the specification of a service by integer id.
+ * Return the id of a service by name.
*
- * This function provides the specification of a service. This can be used by
- * the application to understand what the service represents. The service
- * must not be modified by the application directly, only passed to the various
- * rte_service_* functions.
- *
- * @param id The integer id of the service to retrieve
- * @retval non-zero A valid pointer to the service_spec
- * @retval NULL Invalid *id* provided.
- */
-struct rte_service_spec *rte_service_get_by_id(uint32_t id);
-
-/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
+ * This function provides the id of the service using the service name as
+ * lookup key. The service id is to be passed to other functions in the
+ * rte_service_* API.
*
- * Return the specification of a service by name.
- *
- * This function provides the specification of a service using the service name
- * as lookup key. This can be used by the application to understand what the
- * service represents. The service must not be modified by the application
- * directly, only passed to the various rte_service_* functions.
+ * Example usage:
+ * @code
+ * uint32_t service_id;
+ * int32_t ret = rte_service_get_by_name("service_X", &service_id);
+ * if (ret) {
+ * // handle error
+ * }
+ * @endcode
*
* @param name The name of the service to retrieve
- * @retval non-zero A valid pointer to the service_spec
- * @retval NULL Invalid *name* provided.
+ * @param[out] service_id A pointer to a uint32_t, to be filled in with the id.
+ * @retval 0 Success. The service id is provided in *service_id*.
+ * @retval -EINVAL Null *service_id* pointer provided
+ * @retval -ENODEV No such service registered
*/
-struct rte_service_spec *rte_service_get_by_name(const char *name);
+int32_t rte_service_get_by_name(const char *name, uint32_t *service_id);
/**
* @warning
@@ -133,7 +122,7 @@ struct rte_service_spec *rte_service_get_by_name(const char *name);
* @return A pointer to the name of the service. The returned pointer remains
* in ownership of the service, and the application must not free it.
*/
-const char *rte_service_get_name(const struct rte_service_spec *service);
+const char *rte_service_get_name(uint32_t id);
/**
* @warning
@@ -146,17 +135,16 @@ const char *rte_service_get_name(const struct rte_service_spec *service);
* @retval 1 Capability supported by this service instance
* @retval 0 Capability not supported by this service instance
*/
-int32_t rte_service_probe_capability(const struct rte_service_spec *service,
- uint32_t capability);
+int32_t rte_service_probe_capability(uint32_t id, uint32_t capability);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Enable a core to run a service.
+ * Map or unmap a lcore to a service.
*
- * Each core can be added or removed from running specific services. This
- * functions adds *lcore* to the set of cores that will run *service*.
+ * Each core can be added or removed from running a specific service. This
+ * function enables or disables *lcore* to run *service_id*.
*
* If multiple cores are enabled on a service, an atomic is used to ensure that
* only one cores runs the service at a time. The exception to this is when
@@ -164,82 +152,120 @@ int32_t rte_service_probe_capability(const struct rte_service_spec *service,
* called RTE_SERVICE_CAP_MT_SAFE. With the multi-thread safe capability set,
* the service function can be run on multiple threads at the same time.
*
- * @retval 0 lcore added successfully
+ * @param service_id the service to apply the lcore to
+ * @param lcore The lcore that will be mapped to service
+ * @param enable Zero to unmap or disable the core, non-zero to enable
+ *
+ * @retval 0 lcore map updated successfully
* @retval -EINVAL An invalid service or lcore was provided.
*/
-int32_t rte_service_enable_on_lcore(struct rte_service_spec *service,
- uint32_t lcore);
+int32_t rte_service_map_lcore_set(uint32_t service_id, uint32_t lcore,
+ uint32_t enable);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Disable a core to run a service.
+ * Retrieve the mapping of an lcore to a service.
*
- * Each core can be added or removed from running specific services. This
- * functions removes *lcore* to the set of cores that will run *service*.
+ * @param service_id the service to apply the lcore to
+ * @param lcore The lcore that will be mapped to service
*
- * @retval 0 Lcore removed successfully
+ * @retval 1 lcore is mapped to service
+ * @retval 0 lcore is not mapped to service
* @retval -EINVAL An invalid service or lcore was provided.
*/
-int32_t rte_service_disable_on_lcore(struct rte_service_spec *service,
- uint32_t lcore);
+int32_t rte_service_map_lcore_get(uint32_t service_id, uint32_t lcore);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Return if an lcore is enabled for the service.
+ * Set the runstate of the service.
*
- * This function allows the application to query if *lcore* is currently set to
- * run *service*.
+ * Each service is either running or stopped. Setting a non-zero runstate
+ * enables the service to run, while setting runstate zero disables it.
*
- * @retval 1 Lcore enabled on this lcore
- * @retval 0 Lcore disabled on this lcore
- * @retval -EINVAL An invalid service or lcore was provided.
+ * @param id The id of the service
+ * @param runstate The run state to apply to the service
+ *
+ * @retval 0 The service was successfully started
+ * @retval -EINVAL Invalid service id
*/
-int32_t rte_service_get_enabled_on_lcore(struct rte_service_spec *service,
- uint32_t lcore);
-
+int32_t rte_service_runstate_set(uint32_t id, uint32_t runstate);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Enable *service* to run.
- *
- * This function switches on a service during runtime.
- * @retval 0 The service was successfully started
+ * Get the runstate for the service with *id*. See *rte_service_runstate_set*
+ * for details of runstates. A service can call this function to ensure that
+ * the application has indicated that it will receive CPU cycles. Either a
+ * service-core is mapped (default case), or the application has explicitly
+ * disabled the check that a service-cores is mapped to the service and takes
+ * responsibility to run the service manually using the available function
+ * *rte_service_run_iter_on_app_lcore* to do so.
+ *
+ * @retval 1 Service is running
+ * @retval 0 Service is stopped
+ * @retval -EINVAL Invalid service id
*/
-int32_t rte_service_start(struct rte_service_spec *service);
+int32_t rte_service_runstate_get(uint32_t id);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Disable *service*.
+ * Enable or disable the check for a service-core being mapped to the service.
+ * An application can disable the check when takes the responsibility to run a
+ * service itself using *rte_service_run_iter_on_app_lcore*.
+ *
+ * @param id The id of the service to set the check on
+ * @param enable When zero, the check is disabled. Non-zero enables the check.
*
- * Switch off a service, so it is not run until it is *rte_service_start* is
- * called on it.
- * @retval 0 Service successfully switched off
+ * @retval 0 Success
+ * @retval -EINVAL Invalid service ID
*/
-int32_t rte_service_stop(struct rte_service_spec *service);
+int32_t rte_service_set_runstate_mapped_check(uint32_t id, int32_t enable);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Returns if *service* is currently running.
- *
- * This function returns true if the service has been started using
- * *rte_service_start*, AND a service core is mapped to the service. This
- * function can be used to ensure that the service will be run.
- *
- * @retval 1 Service is currently running, and has a service lcore mapped
- * @retval 0 Service is currently stopped, or no service lcore is mapped
- * @retval -EINVAL Invalid service pointer provided
+ * This function runs a service callback from a non-service lcore.
+ *
+ * This function is designed to enable gradual porting to service cores, and
+ * to enable unit tests to verify a service behaves as expected.
+ *
+ * When called, this function ensures that the service identified by *id* is
+ * safe to run on this lcore. Multi-thread safe services are invoked even if
+ * other cores are simultaneously running them as they are multi-thread safe.
+ *
+ * Multi-thread unsafe services are handled depending on the variable
+ * *serialize_multithread_unsafe*:
+ * - When set, the function will check if a service is already being invoked
+ * on another lcore, refusing to run it and returning -EBUSY.
+ * - When zero, the application takes responsibility to ensure that the service
+ * indicated by *id* is not going to be invoked by another lcore. This setting
+ * avoids atomic operations, so is likely to be more performant.
+ *
+ * @param id The ID of the service to run
+ * @param serialize_multithread_unsafe This parameter indicates to the service
+ * cores library if it is required to use atomics to serialize access
+ * to mult-thread unsafe services. As there is an overhead in using
+ * atomics, applications can choose to enable or disable this feature
+ *
+ * Note that any thread calling this function MUST be a DPDK EAL thread, as
+ * the *rte_lcore_id* function is used to access internal data structures.
+ *
+ * @retval 0 Service was run on the calling thread successfully
+ * @retval -EBUSY Another lcore is executing the service, and it is not a
+ * multi-thread safe service, so the service was not run on this lcore
+ * @retval -ENOEXEC Service is not in a run-able state
+ * @retval -EINVAL Invalid service id
*/
-int32_t rte_service_is_running(const struct rte_service_spec *service);
+int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
+ uint32_t serialize_multithread_unsafe);
/**
* @warning
@@ -341,13 +367,12 @@ int32_t rte_service_lcore_reset_all(void);
* Enable or disable statistics collection for *service*.
*
* This function enables per core, per-service cycle count collection.
- * @param service The service to enable statistics gathering on.
+ * @param id The service to enable statistics gathering on.
* @param enable Zero to disable statistics, non-zero to enable.
* @retval 0 Success
* @retval -EINVAL Invalid service pointer passed
*/
-int32_t rte_service_set_stats_enable(struct rte_service_spec *service,
- int32_t enable);
+int32_t rte_service_set_stats_enable(uint32_t id, int32_t enable);
/**
* @warning
@@ -374,10 +399,26 @@ int32_t rte_service_lcore_list(uint32_t array[], uint32_t n);
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Dumps any information available about the service. If service is NULL,
- * dumps info for all services.
+ * Get the numer of services running on the supplied lcore.
+ *
+ * @param lcore Id of the service core.
+ * @retval >=0 Number of services registered to this core.
+ * @retval -EINVAL Invalid lcore provided
+ * @retval -ENOTSUP The provided lcore is not a service core.
+ */
+int32_t rte_service_lcore_count_services(uint32_t lcore);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Dumps any information available about the service. When id is UINT32_MAX,
+ * this function dumps info for all services.
+ *
+ * @retval 0 Statistics have been successfully dumped
+ * @retval -EINVAL Invalid service id provided
*/
-int32_t rte_service_dump(FILE *f, struct rte_service_spec *service);
+int32_t rte_service_dump(FILE *f, uint32_t id);
#ifdef __cplusplus
}
diff --git a/lib/librte_eal/common/include/rte_service_component.h b/lib/librte_eal/common/include/rte_service_component.h
index 7a946a1e..ac965cb4 100644
--- a/lib/librte_eal/common/include/rte_service_component.h
+++ b/lib/librte_eal/common/include/rte_service_component.h
@@ -85,21 +85,30 @@ struct rte_service_spec {
*
* For example the eventdev SW PMD requires CPU cycles to perform its
* scheduling. This can be achieved by registering it as a service, and the
- * application can then assign CPU resources to it using
- * *rte_service_set_coremask*.
+ * application can then assign CPU resources to that service.
+ *
+ * Note that when a service component registers itself, it is not permitted to
+ * add or remove service-core threads, or modify lcore-to-service mappings. The
+ * only API that may be called by the service-component is
+ * *rte_service_component_runstate_set*, which indicates that the service
+ * component is ready to be executed.
*
* @param spec The specification of the service to register
+ * @param[out] service_id A pointer to a uint32_t, which will be filled in
+ * during registration of the service. It is set to the integers
+ * service number given to the service. This parameter may be NULL.
* @retval 0 Successfully registered the service.
* -EINVAL Attempted to register an invalid service (eg, no callback
* set)
*/
-int32_t rte_service_register(const struct rte_service_spec *spec);
+int32_t rte_service_component_register(const struct rte_service_spec *spec,
+ uint32_t *service_id);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Unregister a service.
+ * Unregister a service component.
*
* The service being removed must be stopped before calling this function.
*
@@ -107,7 +116,7 @@ int32_t rte_service_register(const struct rte_service_spec *spec);
* @retval -EBUSY The service is currently running, stop the service before
* calling unregister. No action has been taken.
*/
-int32_t rte_service_unregister(struct rte_service_spec *service);
+int32_t rte_service_component_unregister(uint32_t id);
/**
* @warning
@@ -131,6 +140,23 @@ int32_t rte_service_start_with_defaults(void);
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
+ * Set the backend runstate of a component.
+ *
+ * This function allows services to be registered at startup, but not yet
+ * enabled to run by default. When the service has been configured (via the
+ * usual method; eg rte_eventdev_configure, the service can mark itself as
+ * ready to run. The differentiation between backend runstate and
+ * service_runstate is that the backend runstate is set by the service
+ * component while the service runstate is reserved for application usage.
+ *
+ * @retval 0 Success
+ */
+int32_t rte_service_component_runstate_set(uint32_t id, uint32_t runstate);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
* Initialize the service library.
*
* In order to use the service library, it must be initialized. EAL initializes
diff --git a/lib/librte_eal/common/include/rte_vdev.h b/lib/librte_eal/common/include/rte_vdev.h
deleted file mode 100644
index 29f5a523..00000000
--- a/lib/librte_eal/common/include/rte_vdev.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2016 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef RTE_VDEV_H
-#define RTE_VDEV_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/queue.h>
-#include <rte_dev.h>
-#include <rte_devargs.h>
-
-struct rte_vdev_device {
- TAILQ_ENTRY(rte_vdev_device) next; /**< Next attached vdev */
- struct rte_device device; /**< Inherit core device */
-};
-
-/**
- * @internal
- * Helper macro for drivers that need to convert to struct rte_vdev_device.
- */
-#define RTE_DEV_TO_VDEV(ptr) \
- container_of(ptr, struct rte_vdev_device, device)
-
-static inline const char *
-rte_vdev_device_name(const struct rte_vdev_device *dev)
-{
- if (dev && dev->device.name)
- return dev->device.name;
- return NULL;
-}
-
-static inline const char *
-rte_vdev_device_args(const struct rte_vdev_device *dev)
-{
- if (dev && dev->device.devargs)
- return dev->device.devargs->args;
- return "";
-}
-
-/** Double linked list of virtual device drivers. */
-TAILQ_HEAD(vdev_driver_list, rte_vdev_driver);
-
-/**
- * Probe function called for each virtual device driver once.
- */
-typedef int (rte_vdev_probe_t)(struct rte_vdev_device *dev);
-
-/**
- * Remove function called for each virtual device driver once.
- */
-typedef int (rte_vdev_remove_t)(struct rte_vdev_device *dev);
-
-/**
- * A virtual device driver abstraction.
- */
-struct rte_vdev_driver {
- TAILQ_ENTRY(rte_vdev_driver) next; /**< Next in list. */
- struct rte_driver driver; /**< Inherited general driver. */
- rte_vdev_probe_t *probe; /**< Virtual device probe function. */
- rte_vdev_remove_t *remove; /**< Virtual device remove function. */
-};
-
-/**
- * Register a virtual device driver.
- *
- * @param driver
- * A pointer to a rte_vdev_driver structure describing the driver
- * to be registered.
- */
-void rte_vdev_register(struct rte_vdev_driver *driver);
-
-/**
- * Unregister a virtual device driver.
- *
- * @param driver
- * A pointer to a rte_vdev_driver structure describing the driver
- * to be unregistered.
- */
-void rte_vdev_unregister(struct rte_vdev_driver *driver);
-
-#define RTE_PMD_REGISTER_VDEV(nm, vdrv)\
-RTE_INIT(vdrvinitfn_ ##vdrv);\
-static const char *vdrvinit_ ## nm ## _alias;\
-static void vdrvinitfn_ ##vdrv(void)\
-{\
- (vdrv).driver.name = RTE_STR(nm);\
- (vdrv).driver.alias = vdrvinit_ ## nm ## _alias;\
- rte_vdev_register(&vdrv);\
-} \
-RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
-
-#define RTE_PMD_REGISTER_ALIAS(nm, alias)\
-static const char *vdrvinit_ ## nm ## _alias = RTE_STR(alias)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index a69a7075..d08cf48a 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -61,7 +61,7 @@ extern "C" {
/**
* Minor version/month number i.e. the mm in yy.mm.z
*/
-#define RTE_VER_MONTH 8
+#define RTE_VER_MONTH 11
/**
* Patch level number i.e. the z in yy.mm.z
@@ -71,14 +71,14 @@ extern "C" {
/**
* Extra string to be appended to version number
*/
-#define RTE_VER_SUFFIX ""
+#define RTE_VER_SUFFIX "-rc"
/**
* Patch release number
* 0-15 = release candidates
* 16 = release
*/
-#define RTE_VER_RELEASE 16
+#define RTE_VER_RELEASE 3
/**
* Macro to compute a version number usable for comparisons
diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
new file mode 100644
index 00000000..a69c4ff6
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_vfio.h
@@ -0,0 +1,153 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 6WIND S.A. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_VFIO_H_
+#define _RTE_VFIO_H_
+
+/*
+ * determine if VFIO is present on the system
+ */
+#if !defined(VFIO_PRESENT) && defined(RTE_EAL_VFIO)
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
+#define VFIO_PRESENT
+#endif /* kernel version >= 3.6.0 */
+#endif /* RTE_EAL_VFIO */
+
+#ifdef VFIO_PRESENT
+
+#include <linux/vfio.h>
+
+#define VFIO_DIR "/dev/vfio"
+#define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
+#define VFIO_GROUP_FMT "/dev/vfio/%u"
+#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
+#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
+#define VFIO_GET_REGION_IDX(x) (x >> 40)
+#define VFIO_NOIOMMU_MODE \
+ "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
+
+/**
+ * Setup vfio_cfg for the device identified by its address.
+ * It discovers the configured I/O MMU groups or sets a new one for the device.
+ * If a new groups is assigned, the DMA mapping is performed.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param sysfs_base
+ * sysfs path prefix.
+ *
+ * @param dev_addr
+ * device location.
+ *
+ * @param vfio_dev_fd
+ * VFIO fd.
+ *
+ * @param device_info
+ * Device information.
+ *
+ * @return
+ * 0 on success.
+ * <0 on failure.
+ * >1 if the device cannot be managed this way.
+ */
+int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
+ int *vfio_dev_fd, struct vfio_device_info *device_info);
+
+/**
+ * Release a device mapped to a VFIO-managed I/O MMU group.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param sysfs_base
+ * sysfs path prefix.
+ *
+ * @param dev_addr
+ * device location.
+ *
+ * @param fd
+ * VFIO fd.
+ *
+ * @return
+ * 0 on success.
+ * <0 on failure.
+ */
+int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd);
+
+/**
+ * Enable a VFIO-related kmod.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param modname
+ * kernel module name.
+ *
+ * @return
+ * 0 on success.
+ * <0 on failure.
+ */
+int rte_vfio_enable(const char *modname);
+
+/**
+ * Check whether a VFIO-related kmod is enabled.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param modname
+ * kernel module name.
+ *
+ * @return
+ * !0 if true.
+ * 0 otherwise.
+ */
+int rte_vfio_is_enabled(const char *modname);
+
+/**
+ * Whether VFIO NOIOMMU mode is enabled.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @return
+ * !0 if true.
+ * 0 otherwise.
+ */
+int rte_vfio_noiommu_is_enabled(void);
+
+#endif /* VFIO_PRESENT */
+
+#endif /* _RTE_VFIO_H_ */
diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
index 15076905..889dffd2 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -275,14 +275,14 @@ malloc_elem_free(struct malloc_elem *elem)
return -1;
rte_spinlock_lock(&(elem->heap->lock));
- size_t sz = elem->size - sizeof(*elem);
+ size_t sz = elem->size - sizeof(*elem) - MALLOC_ELEM_TRAILER_LEN;
uint8_t *ptr = (uint8_t *)&elem[1];
struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
if (next->state == ELEM_FREE){
/* remove from free list, join to this one */
elem_free_list_remove(next);
join_elem(elem, next);
- sz += sizeof(*elem);
+ sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
}
/* check if previous element is free, if so join with it and return,
@@ -291,8 +291,8 @@ malloc_elem_free(struct malloc_elem *elem)
if (elem->prev != NULL && elem->prev->state == ELEM_FREE) {
elem_free_list_remove(elem->prev);
join_elem(elem->prev, elem);
- sz += sizeof(*elem);
- ptr -= sizeof(*elem);
+ sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
+ ptr -= (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
elem = elem->prev;
}
malloc_elem_free_list_insert(elem);
diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h
index f04b2d1e..ce39129d 100644
--- a/lib/librte_eal/common/malloc_elem.h
+++ b/lib/librte_eal/common/malloc_elem.h
@@ -53,13 +53,13 @@ struct malloc_elem {
volatile enum elem_state state;
uint32_t pad;
size_t size;
-#ifdef RTE_LIBRTE_MALLOC_DEBUG
+#ifdef RTE_MALLOC_DEBUG
uint64_t header_cookie; /* Cookie marking start of data */
/* trailer cookie at start + size */
#endif
} __rte_cache_aligned;
-#ifndef RTE_LIBRTE_MALLOC_DEBUG
+#ifndef RTE_MALLOC_DEBUG
static const unsigned MALLOC_ELEM_TRAILER_LEN = 0;
/* dummy function - just check if pointer is non-null */
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
index 5c0627bf..fe2278bc 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -246,15 +246,22 @@ rte_malloc_set_limit(__rte_unused const char *type,
}
/*
- * Return the physical address of a virtual address obtained through rte_malloc
+ * Return the IO address of a virtual address obtained through rte_malloc
*/
-phys_addr_t
-rte_malloc_virt2phy(const void *addr)
+rte_iova_t
+rte_malloc_virt2iova(const void *addr)
{
+ rte_iova_t iova;
const struct malloc_elem *elem = malloc_elem_from_data(addr);
if (elem == NULL)
- return RTE_BAD_PHYS_ADDR;
- if (elem->ms->phys_addr == RTE_BAD_PHYS_ADDR)
- return RTE_BAD_PHYS_ADDR;
- return elem->ms->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->ms->addr);
+ return RTE_BAD_IOVA;
+ if (elem->ms->iova == RTE_BAD_IOVA)
+ return RTE_BAD_IOVA;
+
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ iova = (uintptr_t)addr;
+ else
+ iova = elem->ms->iova +
+ RTE_PTR_DIFF(addr, elem->ms->addr);
+ return iova;
}
diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 7efb76dc..09b758c9 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -54,6 +54,7 @@
#define SERVICE_F_REGISTERED (1 << 0)
#define SERVICE_F_STATS_ENABLED (1 << 1)
+#define SERVICE_F_START_CHECK (1 << 2)
/* runstates for services and lcores, denoting if they are active or not */
#define RUNSTATE_STOPPED 0
@@ -71,11 +72,12 @@ struct rte_service_spec_impl {
rte_atomic32_t execute_lock;
/* API set/get-able variables */
- int32_t runstate;
+ int8_t app_runstate;
+ int8_t comp_runstate;
uint8_t internal_flags;
/* per service statistics */
- uint32_t num_mapped_cores;
+ rte_atomic32_t num_mapped_cores;
uint64_t calls;
uint64_t cycles_spent;
} __rte_cache_aligned;
@@ -144,6 +146,13 @@ service_valid(uint32_t id)
return !!(rte_services[id].internal_flags & SERVICE_F_REGISTERED);
}
+/* validate ID and retrieve service pointer, or return error value */
+#define SERVICE_VALID_GET_OR_ERR_RET(id, service, retval) do { \
+ if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id)) \
+ return retval; \
+ service = &rte_services[id]; \
+} while (0)
+
/* returns 1 if statistics should be colleced for service
* Returns 0 if statistics should not be collected for service
*/
@@ -156,21 +165,31 @@ service_stats_enabled(struct rte_service_spec_impl *impl)
static inline int
service_mt_safe(struct rte_service_spec_impl *s)
{
- return s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE;
+ return !!(s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE);
}
-int32_t rte_service_set_stats_enable(struct rte_service_spec *service,
- int32_t enabled)
+int32_t rte_service_set_stats_enable(uint32_t id, int32_t enabled)
{
- struct rte_service_spec_impl *impl =
- (struct rte_service_spec_impl *)service;
- if (!impl)
- return -EINVAL;
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, 0);
if (enabled)
- impl->internal_flags |= SERVICE_F_STATS_ENABLED;
+ s->internal_flags |= SERVICE_F_STATS_ENABLED;
else
- impl->internal_flags &= ~(SERVICE_F_STATS_ENABLED);
+ s->internal_flags &= ~(SERVICE_F_STATS_ENABLED);
+
+ return 0;
+}
+
+int32_t rte_service_set_runstate_mapped_check(uint32_t id, int32_t enabled)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, 0);
+
+ if (enabled)
+ s->internal_flags |= SERVICE_F_START_CHECK;
+ else
+ s->internal_flags &= ~(SERVICE_F_START_CHECK);
return 0;
}
@@ -181,58 +200,42 @@ rte_service_get_count(void)
return rte_service_count;
}
-struct rte_service_spec *
-rte_service_get_by_id(uint32_t id)
+int32_t rte_service_get_by_name(const char *name, uint32_t *service_id)
{
- struct rte_service_spec *service = NULL;
- if (id < rte_service_count)
- service = (struct rte_service_spec *)&rte_services[id];
-
- return service;
-}
+ if (!service_id)
+ return -EINVAL;
-struct rte_service_spec *rte_service_get_by_name(const char *name)
-{
- struct rte_service_spec *service = NULL;
int i;
for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
if (service_valid(i) &&
strcmp(name, rte_services[i].spec.name) == 0) {
- service = (struct rte_service_spec *)&rte_services[i];
- break;
+ *service_id = i;
+ return 0;
}
}
- return service;
+ return -ENODEV;
}
const char *
-rte_service_get_name(const struct rte_service_spec *service)
+rte_service_get_name(uint32_t id)
{
- return service->name;
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, 0);
+ return s->spec.name;
}
int32_t
-rte_service_probe_capability(const struct rte_service_spec *service,
- uint32_t capability)
+rte_service_probe_capability(uint32_t id, uint32_t capability)
{
- return service->capabilities & capability;
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+ return !!(s->spec.capabilities & capability);
}
int32_t
-rte_service_is_running(const struct rte_service_spec *spec)
-{
- const struct rte_service_spec_impl *impl =
- (const struct rte_service_spec_impl *)spec;
- if (!impl)
- return -EINVAL;
-
- return (impl->runstate == RUNSTATE_RUNNING) &&
- (impl->num_mapped_cores > 0);
-}
-
-int32_t
-rte_service_register(const struct rte_service_spec *spec)
+rte_service_component_register(const struct rte_service_spec *spec,
+ uint32_t *id_ptr)
{
uint32_t i;
int32_t free_slot = -1;
@@ -252,68 +255,161 @@ rte_service_register(const struct rte_service_spec *spec)
struct rte_service_spec_impl *s = &rte_services[free_slot];
s->spec = *spec;
- s->internal_flags |= SERVICE_F_REGISTERED;
+ s->internal_flags |= SERVICE_F_REGISTERED | SERVICE_F_START_CHECK;
rte_smp_wmb();
rte_service_count++;
+ if (id_ptr)
+ *id_ptr = free_slot;
+
return 0;
}
int32_t
-rte_service_unregister(struct rte_service_spec *spec)
+rte_service_component_unregister(uint32_t id)
{
- struct rte_service_spec_impl *s = NULL;
- struct rte_service_spec_impl *spec_impl =
- (struct rte_service_spec_impl *)spec;
-
uint32_t i;
- uint32_t service_id;
- for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
- if (&rte_services[i] == spec_impl) {
- s = spec_impl;
- service_id = i;
- break;
- }
- }
-
- if (!s)
- return -EINVAL;
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
rte_service_count--;
rte_smp_wmb();
s->internal_flags &= ~(SERVICE_F_REGISTERED);
+ /* clear the run-bit in all cores */
for (i = 0; i < RTE_MAX_LCORE; i++)
- lcore_states[i].service_mask &= ~(UINT64_C(1) << service_id);
+ lcore_states[i].service_mask &= ~(UINT64_C(1) << id);
- memset(&rte_services[service_id], 0,
- sizeof(struct rte_service_spec_impl));
+ memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl));
return 0;
}
int32_t
-rte_service_start(struct rte_service_spec *service)
+rte_service_component_runstate_set(uint32_t id, uint32_t runstate)
{
- struct rte_service_spec_impl *s =
- (struct rte_service_spec_impl *)service;
- s->runstate = RUNSTATE_RUNNING;
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ if (runstate)
+ s->comp_runstate = RUNSTATE_RUNNING;
+ else
+ s->comp_runstate = RUNSTATE_STOPPED;
+
rte_smp_wmb();
return 0;
}
int32_t
-rte_service_stop(struct rte_service_spec *service)
+rte_service_runstate_set(uint32_t id, uint32_t runstate)
{
- struct rte_service_spec_impl *s =
- (struct rte_service_spec_impl *)service;
- s->runstate = RUNSTATE_STOPPED;
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ if (runstate)
+ s->app_runstate = RUNSTATE_RUNNING;
+ else
+ s->app_runstate = RUNSTATE_STOPPED;
+
rte_smp_wmb();
return 0;
}
+int32_t
+rte_service_runstate_get(uint32_t id)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+ rte_smp_rmb();
+
+ int check_disabled = !(s->internal_flags & SERVICE_F_START_CHECK);
+ int lcore_mapped = (rte_atomic32_read(&s->num_mapped_cores) > 0);
+
+ return (s->app_runstate == RUNSTATE_RUNNING) &&
+ (s->comp_runstate == RUNSTATE_RUNNING) &&
+ (check_disabled | lcore_mapped);
+}
+
+static inline void
+rte_service_runner_do_callback(struct rte_service_spec_impl *s,
+ struct core_state *cs, uint32_t service_idx)
+{
+ void *userdata = s->spec.callback_userdata;
+
+ if (service_stats_enabled(s)) {
+ uint64_t start = rte_rdtsc();
+ s->spec.callback(userdata);
+ uint64_t end = rte_rdtsc();
+ s->cycles_spent += end - start;
+ cs->calls_per_service[service_idx]++;
+ s->calls++;
+ } else
+ s->spec.callback(userdata);
+}
+
+
+static inline int32_t
+service_run(uint32_t i, struct core_state *cs, uint64_t service_mask)
+{
+ if (!service_valid(i))
+ return -EINVAL;
+ struct rte_service_spec_impl *s = &rte_services[i];
+ if (s->comp_runstate != RUNSTATE_RUNNING ||
+ s->app_runstate != RUNSTATE_RUNNING ||
+ !(service_mask & (UINT64_C(1) << i)))
+ return -ENOEXEC;
+
+ /* check do we need cmpset, if MT safe or <= 1 core
+ * mapped, atomic ops are not required.
+ */
+ const int use_atomics = (service_mt_safe(s) == 0) &&
+ (rte_atomic32_read(&s->num_mapped_cores) > 1);
+ if (use_atomics) {
+ if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
+ return -EBUSY;
+
+ rte_service_runner_do_callback(s, cs, i);
+ rte_atomic32_clear(&s->execute_lock);
+ } else
+ rte_service_runner_do_callback(s, cs, i);
+
+ return 0;
+}
+
+int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
+ uint32_t serialize_mt_unsafe)
+{
+ /* run service on calling core, using all-ones as the service mask */
+ if (!service_valid(id))
+ return -EINVAL;
+
+ struct core_state *cs = &lcore_states[rte_lcore_id()];
+ struct rte_service_spec_impl *s = &rte_services[id];
+
+ /* Atomically add this core to the mapped cores first, then examine if
+ * we can run the service. This avoids a race condition between
+ * checking the value, and atomically adding to the mapped count.
+ */
+ if (serialize_mt_unsafe)
+ rte_atomic32_inc(&s->num_mapped_cores);
+
+ if (service_mt_safe(s) == 0 &&
+ rte_atomic32_read(&s->num_mapped_cores) > 1) {
+ if (serialize_mt_unsafe)
+ rte_atomic32_dec(&s->num_mapped_cores);
+ return -EBUSY;
+ }
+
+ int ret = service_run(id, cs, UINT64_MAX);
+
+ if (serialize_mt_unsafe)
+ rte_atomic32_dec(&s->num_mapped_cores);
+
+ return ret;
+}
+
static int32_t
rte_service_runner_func(void *arg)
{
@@ -324,35 +420,10 @@ rte_service_runner_func(void *arg)
while (lcore_states[lcore].runstate == RUNSTATE_RUNNING) {
const uint64_t service_mask = cs->service_mask;
- for (i = 0; i < rte_service_count; i++) {
- struct rte_service_spec_impl *s = &rte_services[i];
- if (s->runstate != RUNSTATE_RUNNING ||
- !(service_mask & (UINT64_C(1) << i)))
- continue;
- /* check do we need cmpset, if MT safe or <= 1 core
- * mapped, atomic ops are not required.
- */
- const int need_cmpset = !((service_mt_safe(s) == 0) &&
- (s->num_mapped_cores > 1));
- uint32_t *lock = (uint32_t *)&s->execute_lock;
-
- if (need_cmpset || rte_atomic32_cmpset(lock, 0, 1)) {
- void *userdata = s->spec.callback_userdata;
-
- if (service_stats_enabled(s)) {
- uint64_t start = rte_rdtsc();
- s->spec.callback(userdata);
- uint64_t end = rte_rdtsc();
- s->cycles_spent += end - start;
- cs->calls_per_service[i]++;
- s->calls++;
- } else
- s->spec.callback(userdata);
-
- if (need_cmpset)
- rte_atomic32_clear(&s->execute_lock);
- }
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ /* return value ignored as no change to code flow */
+ service_run(i, cs, service_mask);
}
rte_smp_rmb();
@@ -397,6 +468,19 @@ rte_service_lcore_list(uint32_t array[], uint32_t n)
}
int32_t
+rte_service_lcore_count_services(uint32_t lcore)
+{
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ struct core_state *cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -ENOTSUP;
+
+ return __builtin_popcountll(cs->service_mask);
+}
+
+int32_t
rte_service_start_with_defaults(void)
{
/* create a default mapping from cores to services, then start the
@@ -407,7 +491,7 @@ rte_service_start_with_defaults(void)
uint32_t count = rte_service_get_count();
int32_t lcore_iter = 0;
- uint32_t ids[RTE_MAX_LCORE];
+ uint32_t ids[RTE_MAX_LCORE] = {0};
int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
if (lcore_count == 0)
@@ -417,16 +501,12 @@ rte_service_start_with_defaults(void)
rte_service_lcore_start(ids[i]);
for (i = 0; i < count; i++) {
- struct rte_service_spec *s = rte_service_get_by_id(i);
- if (!s)
- return -EINVAL;
-
/* do 1:1 core mapping here, with each service getting
* assigned a single core by default. Adding multiple services
* should multiplex to a single core, or 1:1 if there are the
* same amount of services as service-cores
*/
- ret = rte_service_enable_on_lcore(s, ids[lcore_iter]);
+ ret = rte_service_map_lcore_set(i, ids[lcore_iter], 1);
if (ret)
return -ENODEV;
@@ -434,7 +514,7 @@ rte_service_start_with_defaults(void)
if (lcore_iter >= lcore_count)
lcore_iter = 0;
- ret = rte_service_start(s);
+ ret = rte_service_runstate_set(i, 1);
if (ret)
return -ENOEXEC;
}
@@ -467,43 +547,40 @@ service_update(struct rte_service_spec *service, uint32_t lcore,
if (set) {
if (*set) {
lcore_states[lcore].service_mask |= sid_mask;
- rte_services[sid].num_mapped_cores++;
+ rte_atomic32_inc(&rte_services[sid].num_mapped_cores);
} else {
lcore_states[lcore].service_mask &= ~(sid_mask);
- rte_services[sid].num_mapped_cores--;
+ rte_atomic32_dec(&rte_services[sid].num_mapped_cores);
}
}
if (enabled)
- *enabled = (lcore_states[lcore].service_mask & (sid_mask));
+ *enabled = !!(lcore_states[lcore].service_mask & (sid_mask));
rte_smp_wmb();
return 0;
}
-int32_t rte_service_get_enabled_on_lcore(struct rte_service_spec *service,
- uint32_t lcore)
-{
- uint32_t enabled;
- int ret = service_update(service, lcore, 0, &enabled);
- if (ret == 0)
- return enabled;
- return -EINVAL;
-}
-
int32_t
-rte_service_enable_on_lcore(struct rte_service_spec *service, uint32_t lcore)
+rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled)
{
- uint32_t on = 1;
- return service_update(service, lcore, &on, 0);
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+ uint32_t on = enabled > 0;
+ return service_update(&s->spec, lcore, &on, 0);
}
int32_t
-rte_service_disable_on_lcore(struct rte_service_spec *service, uint32_t lcore)
+rte_service_map_lcore_get(uint32_t id, uint32_t lcore)
{
- uint32_t off = 0;
- return service_update(service, lcore, &off, 0);
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+ uint32_t enabled;
+ int ret = service_update(&s->spec, lcore, 0, &enabled);
+ if (ret == 0)
+ return enabled;
+ return ret;
}
int32_t rte_service_lcore_reset_all(void)
@@ -516,7 +593,7 @@ int32_t rte_service_lcore_reset_all(void)
lcore_states[i].runstate = RUNSTATE_STOPPED;
}
for (i = 0; i < RTE_SERVICE_NUM_MAX; i++)
- rte_services[i].num_mapped_cores = 0;
+ rte_atomic32_set(&rte_services[i].num_mapped_cores, 0);
rte_smp_wmb();
@@ -552,7 +629,8 @@ rte_service_lcore_add(uint32_t lcore)
lcore_states[lcore].runstate = RUNSTATE_STOPPED;
rte_smp_wmb();
- return 0;
+
+ return rte_eal_wait_lcore(lcore);
}
int32_t
@@ -607,12 +685,12 @@ rte_service_lcore_stop(uint32_t lcore)
return -EALREADY;
uint32_t i;
+ uint64_t service_mask = lcore_states[lcore].service_mask;
for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
- int32_t enabled =
- lcore_states[i].service_mask & (UINT64_C(1) << i);
- int32_t service_running = rte_services[i].runstate !=
- RUNSTATE_STOPPED;
- int32_t only_core = rte_services[i].num_mapped_cores == 1;
+ int32_t enabled = service_mask & (UINT64_C(1) << i);
+ int32_t service_running = rte_service_runstate_get(i);
+ int32_t only_core = (1 ==
+ rte_atomic32_read(&rte_services[i].num_mapped_cores));
/* if the core is mapped, and the service is running, and this
* is the only core that is mapped, the service would cease to
@@ -667,28 +745,34 @@ service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset)
fprintf(f, "\n");
}
-int32_t rte_service_dump(FILE *f, struct rte_service_spec *service)
+int32_t rte_service_dump(FILE *f, uint32_t id)
{
uint32_t i;
+ int print_one = (id != UINT32_MAX);
uint64_t total_cycles = 0;
- for (i = 0; i < rte_service_count; i++) {
+
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
if (!service_valid(i))
continue;
total_cycles += rte_services[i].cycles_spent;
}
- if (service) {
- struct rte_service_spec_impl *s =
- (struct rte_service_spec_impl *)service;
+ /* print only the specified service */
+ if (print_one) {
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
fprintf(f, "Service %s Summary\n", s->spec.name);
uint32_t reset = 0;
rte_service_dump_one(f, s, total_cycles, reset);
return 0;
}
+ /* print all services, as UINT32_MAX was passed as id */
fprintf(f, "Services Summary\n");
- for (i = 0; i < rte_service_count; i++) {
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if (!service_valid(i))
+ continue;
uint32_t reset = 1;
rte_service_dump_one(f, &rte_services[i], total_cycles, reset);
}
@@ -698,7 +782,7 @@ int32_t rte_service_dump(FILE *f, struct rte_service_spec *service)
if (lcore_config[i].core_role != ROLE_SERVICE)
continue;
- uint32_t reset = 0;
+ uint32_t reset = 1;
service_dump_calls_per_lcore(f, i, reset);
}
diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile
index 4794696b..2ebdf313 100644
--- a/lib/librte_eal/linuxapp/Makefile
+++ b/lib/librte_eal/linuxapp/Makefile
@@ -35,7 +35,5 @@ DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal
DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio
DIRS-$(CONFIG_RTE_KNI_KMOD) += kni
DEPDIRS-kni := eal
-DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += xen_dom0
-DEPDIRS-xen_dom0 := eal
include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 90bca4d6..5a7b8b2a 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -34,10 +34,10 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_eal.a
ARCH_DIR ?= $(RTE_ARCH)
-EXPORT_MAP := rte_eal_version.map
+EXPORT_MAP := ../../rte_eal_version.map
VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
-LIBABIVER := 5
+LIBABIVER := 6
VPATH += $(RTE_SDK)/lib/librte_eal/common
@@ -58,16 +58,10 @@ endif
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) := eal.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_hugepage_info.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_memory.c
-ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_xen_memory.c
-endif
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_thread.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_log.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio_mp_sync.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_uio.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_debug.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
@@ -80,9 +74,6 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memzone.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_log.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_launch.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_vdev.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci_uio.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memory.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_tailqs.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_errno.c
@@ -104,6 +95,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c
# from arch dir
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c
SRCS-$(CONFIG_RTE_ARCH_X86) += rte_spinlock.c
+SRCS-y += rte_cycles.c
CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST)
@@ -116,13 +108,11 @@ CFLAGS_eal_thread.o := -D_GNU_SOURCE
CFLAGS_eal_log.o := -D_GNU_SOURCE
CFLAGS_eal_common_log.o := -D_GNU_SOURCE
CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
-CFLAGS_eal_pci.o := -D_GNU_SOURCE
-CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE
-CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE
CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
CFLAGS_eal_common_options.o := -D_GNU_SOURCE
CFLAGS_eal_common_thread.o := -D_GNU_SOURCE
CFLAGS_eal_common_lcore.o := -D_GNU_SOURCE
+CFLAGS_rte_cycles.o := -D_GNU_SOURCE
# workaround for a gcc bug with noreturn attribute
# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
@@ -130,7 +120,7 @@ ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
CFLAGS_eal_thread.o += -Wno-return-type
endif
-INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h
+INC := rte_kni_common.h
SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \
$(addprefix include/exec-env/,$(INC))
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 48f12f44..229eec9f 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -56,7 +56,6 @@
#include <rte_common.h>
#include <rte_debug.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
@@ -71,12 +70,12 @@
#include <rte_cpuflags.h>
#include <rte_interrupts.h>
#include <rte_bus.h>
-#include <rte_pci.h>
#include <rte_dev.h>
#include <rte_devargs.h>
#include <rte_version.h>
#include <rte_atomic.h>
#include <malloc_heap.h>
+#include <rte_vfio.h>
#include "eal_private.h"
#include "eal_thread.h"
@@ -121,6 +120,13 @@ struct internal_config internal_config;
/* used by rte_rdtsc() */
int rte_cycles_vmware_tsc_map;
+/* Return mbuf pool ops name */
+const char *
+rte_eal_mbuf_default_mempool_ops(void)
+{
+ return internal_config.mbuf_pool_ops_name;
+}
+
/* Return a pointer to the configuration structure */
struct rte_config *
rte_eal_get_configuration(void)
@@ -128,6 +134,12 @@ rte_eal_get_configuration(void)
return &rte_config;
}
+enum rte_iova_mode
+rte_eal_iova_mode(void)
+{
+ return rte_eal_get_configuration()->iova_mode;
+}
+
/* parse a sysfs (or other) file containing one integer value */
int
eal_parse_sysfs_value(const char *filename, unsigned long *val)
@@ -354,7 +366,6 @@ eal_usage(const char *prgname)
" --"OPT_BASE_VIRTADDR" Base virtual address\n"
" --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n"
" --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n"
- " --"OPT_XEN_DOM0" Support running on Xen dom0 without hugetlbfs\n"
"\n");
/* Allow the application to print its usage message too if hook is set */
if ( rte_application_usage_hook ) {
@@ -555,25 +566,12 @@ eal_parse_args(int argc, char **argv)
eal_usage(prgname);
exit(EXIT_SUCCESS);
- /* long options */
- case OPT_XEN_DOM0_NUM:
-#ifdef RTE_LIBRTE_XEN_DOM0
- internal_config.xen_dom0_support = 1;
-#else
- RTE_LOG(ERR, EAL, "Can't support DPDK app "
- "running on Dom0, please configure"
- " RTE_LIBRTE_XEN_DOM0=y\n");
- ret = -1;
- goto out;
-#endif
- break;
-
case OPT_HUGE_DIR_NUM:
- internal_config.hugepage_dir = optarg;
+ internal_config.hugepage_dir = strdup(optarg);
break;
case OPT_FILE_PREFIX_NUM:
- internal_config.hugefile_prefix = optarg;
+ internal_config.hugefile_prefix = strdup(optarg);
break;
case OPT_SOCKET_MEM_NUM:
@@ -610,6 +608,10 @@ eal_parse_args(int argc, char **argv)
internal_config.create_uio_dev = 1;
break;
+ case OPT_MBUF_POOL_OPS_NAME_NUM:
+ internal_config.mbuf_pool_ops_name = optarg;
+ break;
+
default:
if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
RTE_LOG(ERR, EAL, "Option %c is not supported "
@@ -641,15 +643,6 @@ eal_parse_args(int argc, char **argv)
goto out;
}
- /* --xen-dom0 doesn't make sense with --socket-mem */
- if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) {
- RTE_LOG(ERR, EAL, "Options --"OPT_SOCKET_MEM" cannot be specified "
- "together with --"OPT_XEN_DOM0"\n");
- eal_usage(prgname);
- ret = -1;
- goto out;
- }
-
if (optind >= 0)
argv[optind-1] = prgname;
ret = optind-1;
@@ -716,10 +709,9 @@ static int rte_eal_vfio_setup(void)
{
int vfio_enabled = 0;
- if (!internal_config.no_pci) {
- pci_vfio_enable();
- vfio_enabled |= pci_vfio_is_enabled();
- }
+ if (rte_vfio_enable("vfio"))
+ return -1;
+ vfio_enabled = rte_vfio_is_enabled("vfio");
if (vfio_enabled) {
@@ -792,9 +784,40 @@ rte_eal_init(int argc, char **argv)
return -1;
}
+ if (eal_plugins_init() < 0) {
+ rte_eal_init_alert("Cannot init plugins\n");
+ rte_errno = EINVAL;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ if (eal_option_device_parse()) {
+ rte_errno = ENODEV;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ if (rte_bus_scan()) {
+ rte_eal_init_alert("Cannot scan the buses for devices\n");
+ rte_errno = ENODEV;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ /* autodetect the iova mapping mode (default is iova_pa) */
+ rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class();
+
+ /* Workaround for KNI which requires physical address to work */
+ if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA &&
+ rte_eal_check_module("rte_kni") == 1) {
+ rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA;
+ RTE_LOG(WARNING, EAL,
+ "Some devices want IOVA as VA but PA will be used because.. "
+ "KNI module inserted\n");
+ }
+
if (internal_config.no_hugetlbfs == 0 &&
internal_config.process_type != RTE_PROC_SECONDARY &&
- internal_config.xen_dom0_support == 0 &&
eal_hugepage_info_init() < 0) {
rte_eal_init_alert("Cannot get hugepage information.");
rte_errno = EACCES;
@@ -873,9 +896,6 @@ rte_eal_init(int argc, char **argv)
eal_check_mem_on_local_socket();
- if (eal_plugins_init() < 0)
- rte_eal_init_alert("Cannot init plugins\n");
-
eal_thread_init_master(rte_config.master_lcore);
ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
@@ -889,17 +909,6 @@ rte_eal_init(int argc, char **argv)
return -1;
}
- if (eal_option_device_parse()) {
- rte_errno = ENODEV;
- return -1;
- }
-
- if (rte_bus_scan()) {
- rte_eal_init_alert("Cannot scan the buses for devices\n");
- rte_errno = ENODEV;
- return -1;
- }
-
RTE_LCORE_FOREACH_SLAVE(i) {
/*
@@ -983,6 +992,22 @@ int rte_eal_has_hugepages(void)
return ! internal_config.no_hugetlbfs;
}
+int rte_eal_has_pci(void)
+{
+ return !internal_config.no_pci;
+}
+
+int rte_eal_create_uio_dev(void)
+{
+ return internal_config.create_uio_dev;
+}
+
+enum rte_intr_mode
+rte_eal_vfio_intr_mode(void)
+{
+ return internal_config.vfio_intr_mode;
+}
+
int
rte_eal_check_module(const char *module_name)
{
diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c
index fbae4613..8e4a775b 100644
--- a/lib/librte_eal/linuxapp/eal/eal_alarm.c
+++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c
@@ -40,7 +40,6 @@
#include <sys/timerfd.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_interrupts.h>
#include <rte_alarm.h>
#include <rte_common.h>
diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
index 7a21e8f6..86e174fc 100644
--- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
@@ -46,7 +46,6 @@
#include <sys/queue.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_launch.h>
#include <rte_per_lcore.h>
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index 3e9ac41e..1c20693d 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -51,7 +51,6 @@
#include <rte_common.h>
#include <rte_interrupts.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_per_lcore.h>
@@ -60,7 +59,6 @@
#include <rte_branch_prediction.h>
#include <rte_debug.h>
#include <rte_log.h>
-#include <rte_pci.h>
#include <rte_malloc.h>
#include <rte_errno.h>
#include <rte_spinlock.h>
@@ -914,7 +912,7 @@ static void
eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
{
union rte_intr_read_buffer buf;
- int bytes_read = 1;
+ int bytes_read = 0;
int nbytes;
switch (intr_handle->type) {
@@ -930,11 +928,9 @@ eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
break;
#endif
case RTE_INTR_HANDLE_VDEV:
- /* for vdev, fd points to:
- * a. eventfd which does not need to read out;
- * b. datapath fd which needs PMD to read out.
- */
- return;
+ bytes_read = intr_handle->efd_counter_size;
+ /* For vdev, number of bytes to read is set by driver */
+ break;
case RTE_INTR_HANDLE_EXT:
return;
default:
@@ -947,6 +943,8 @@ eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
* read out to clear the ready-to-be-read flag
* for epoll_wait.
*/
+ if (bytes_read == 0)
+ return;
do {
nbytes = read(fd, &buf, bytes_read);
if (nbytes < 0) {
@@ -1206,7 +1204,12 @@ rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)
intr_handle->nb_efd = n;
intr_handle->max_intr = NB_OTHER_INTR + n;
} else if (intr_handle->type == RTE_INTR_HANDLE_VDEV) {
- /* do nothing, and let vdev driver to initialize this struct */
+ /* only check, initialization would be done in vdev driver.*/
+ if (intr_handle->efd_counter_size >
+ sizeof(union rte_intr_read_buffer)) {
+ RTE_LOG(ERR, EAL, "the efd_counter_size is oversized");
+ return -EINVAL;
+ }
} else {
intr_handle->efds[0] = intr_handle->fd;
intr_handle->nb_efd = RTE_MIN(nb_efd, 1U);
diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c
index e3a50aa3..c088bd9b 100644
--- a/lib/librte_eal/linuxapp/eal/eal_log.c
+++ b/lib/librte_eal/linuxapp/eal/eal_log.c
@@ -39,7 +39,6 @@
#include <sys/queue.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_launch.h>
#include <rte_per_lcore.h>
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 52791282..a54b822a 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -59,7 +59,6 @@
#include <rte_log.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
@@ -75,13 +74,6 @@
#define PFN_MASK_SIZE 8
-#ifdef RTE_LIBRTE_XEN_DOM0
-int rte_xen_dom0_supported(void)
-{
- return internal_config.xen_dom0_support;
-}
-#endif
-
/**
* @file
* Huge page mapping under linux
@@ -106,10 +98,6 @@ test_phys_addrs_available(void)
uint64_t tmp;
phys_addr_t physaddr;
- /* For dom0, phys addresses can always be available */
- if (rte_xen_dom0_supported())
- return;
-
if (!rte_eal_has_hugepages()) {
RTE_LOG(ERR, EAL,
"Started without hugepages support, physical addresses not available\n");
@@ -119,10 +107,11 @@ test_phys_addrs_available(void)
physaddr = rte_mem_virt2phy(&tmp);
if (physaddr == RTE_BAD_PHYS_ADDR) {
- RTE_LOG(ERR, EAL,
- "Cannot obtain physical addresses: %s. "
- "Only vfio will function.\n",
- strerror(errno));
+ if (rte_eal_iova_mode() == RTE_IOVA_PA)
+ RTE_LOG(ERR, EAL,
+ "Cannot obtain physical addresses: %s. "
+ "Only vfio will function.\n",
+ strerror(errno));
phys_addrs_available = false;
}
}
@@ -139,32 +128,9 @@ rte_mem_virt2phy(const void *virtaddr)
int page_size;
off_t offset;
- /* when using dom0, /proc/self/pagemap always returns 0, check in
- * dpdk memory by browsing the memsegs */
- if (rte_xen_dom0_supported()) {
- struct rte_mem_config *mcfg;
- struct rte_memseg *memseg;
- unsigned i;
-
- mcfg = rte_eal_get_configuration()->mem_config;
- for (i = 0; i < RTE_MAX_MEMSEG; i++) {
- memseg = &mcfg->memseg[i];
- if (memseg->addr == NULL)
- break;
- if (virtaddr > memseg->addr &&
- virtaddr < RTE_PTR_ADD(memseg->addr,
- memseg->len)) {
- return memseg->phys_addr +
- RTE_PTR_DIFF(virtaddr, memseg->addr);
- }
- }
-
- return RTE_BAD_PHYS_ADDR;
- }
-
/* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
if (!phys_addrs_available)
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
/* standard page size */
page_size = getpagesize();
@@ -173,7 +139,7 @@ rte_mem_virt2phy(const void *virtaddr)
if (fd < 0) {
RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
__func__, strerror(errno));
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
}
virt_pfn = (unsigned long)virtaddr / page_size;
@@ -182,7 +148,7 @@ rte_mem_virt2phy(const void *virtaddr)
RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
__func__, strerror(errno));
close(fd);
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
}
retval = read(fd, &page, PFN_MASK_SIZE);
@@ -190,12 +156,12 @@ rte_mem_virt2phy(const void *virtaddr)
if (retval < 0) {
RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
__func__, strerror(errno));
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
} else if (retval != PFN_MASK_SIZE) {
RTE_LOG(ERR, EAL, "%s(): read %d bytes from /proc/self/pagemap "
"but expected %d:\n",
__func__, retval, PFN_MASK_SIZE);
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
}
/*
@@ -203,7 +169,7 @@ rte_mem_virt2phy(const void *virtaddr)
* pagemap.txt in linux Documentation)
*/
if ((page & 0x7fffffffffffffULL) == 0)
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
physaddr = ((page & 0x7fffffffffffffULL) * page_size)
+ ((unsigned long)virtaddr % page_size);
@@ -211,6 +177,14 @@ rte_mem_virt2phy(const void *virtaddr)
return physaddr;
}
+rte_iova_t
+rte_mem_virt2iova(const void *virtaddr)
+{
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ return (uintptr_t)virtaddr;
+ return rte_mem_virt2phy(virtaddr);
+}
+
/*
* For each hugepage in hugepg_tbl, fill the physaddr value. We find
* it by browsing the /proc/self/pagemap special file.
@@ -716,6 +690,8 @@ create_shared_memory(const char *filename, const size_t mem_size)
}
retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
close(fd);
+ if (retval == MAP_FAILED)
+ return NULL;
return retval;
}
@@ -1059,7 +1035,10 @@ rte_eal_hugepage_init(void)
strerror(errno));
return -1;
}
- mcfg->memseg[0].phys_addr = RTE_BAD_PHYS_ADDR;
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ mcfg->memseg[0].iova = (uintptr_t)addr;
+ else
+ mcfg->memseg[0].iova = RTE_BAD_IOVA;
mcfg->memseg[0].addr = addr;
mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K;
mcfg->memseg[0].len = internal_config.memory;
@@ -1067,17 +1046,6 @@ rte_eal_hugepage_init(void)
return 0;
}
-/* check if app runs on Xen Dom0 */
- if (internal_config.xen_dom0_support) {
-#ifdef RTE_LIBRTE_XEN_DOM0
- /* use dom0_mm kernel driver to init memory */
- if (rte_xen_dom0_memory_init() < 0)
- return -1;
- else
- return 0;
-#endif
- }
-
/* calculate total number of hugepages available. at this point we haven't
* yet started sorting them so they all are on socket 0 */
for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
@@ -1319,7 +1287,7 @@ rte_eal_hugepage_init(void)
if (j == RTE_MAX_MEMSEG)
break;
- mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
+ mcfg->memseg[j].iova = hugepage[i].physaddr;
mcfg->memseg[j].addr = hugepage[i].final_va;
mcfg->memseg[j].len = hugepage[i].size;
mcfg->memseg[j].socket_id = hugepage[i].socket_id;
@@ -1330,7 +1298,7 @@ rte_eal_hugepage_init(void)
#ifdef RTE_ARCH_PPC_64
/* Use the phy and virt address of the last page as segment
* address for IBM Power architecture */
- mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
+ mcfg->memseg[j].iova = hugepage[i].physaddr;
mcfg->memseg[j].addr = hugepage[i].final_va;
#endif
mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz;
@@ -1400,17 +1368,6 @@ rte_eal_hugepage_attach(void)
test_phys_addrs_available();
- if (internal_config.xen_dom0_support) {
-#ifdef RTE_LIBRTE_XEN_DOM0
- if (rte_xen_dom0_memory_attach() < 0) {
- RTE_LOG(ERR, EAL, "Failed to attach memory segments of primary "
- "process\n");
- return -1;
- }
- return 0;
-#endif
- }
-
fd_zero = open("/dev/zero", O_RDONLY);
if (fd_zero < 0) {
RTE_LOG(ERR, EAL, "Could not open /dev/zero\n");
@@ -1542,7 +1499,7 @@ error:
return -1;
}
-bool
+int
rte_eal_using_phys_addrs(void)
{
return phys_addrs_available;
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
deleted file mode 100644
index 8951ce74..00000000
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ /dev/null
@@ -1,722 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <string.h>
-#include <dirent.h>
-
-#include <rte_log.h>
-#include <rte_bus.h>
-#include <rte_pci.h>
-#include <rte_eal_memconfig.h>
-#include <rte_malloc.h>
-#include <rte_devargs.h>
-#include <rte_memcpy.h>
-
-#include "eal_filesystem.h"
-#include "eal_private.h"
-#include "eal_pci_init.h"
-
-/**
- * @file
- * PCI probing under linux
- *
- * This code is used to simulate a PCI probe by parsing information in sysfs.
- * When a registered device matches a driver, it is then initialized with
- * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it).
- */
-
-extern struct rte_pci_bus rte_pci_bus;
-
-static int
-pci_get_kernel_driver_by_path(const char *filename, char *dri_name)
-{
- int count;
- char path[PATH_MAX];
- char *name;
-
- if (!filename || !dri_name)
- return -1;
-
- count = readlink(filename, path, PATH_MAX);
- if (count >= PATH_MAX)
- return -1;
-
- /* For device does not have a driver */
- if (count < 0)
- return 1;
-
- path[count] = '\0';
-
- name = strrchr(path, '/');
- if (name) {
- strncpy(dri_name, name + 1, strlen(name + 1) + 1);
- return 0;
- }
-
- return -1;
-}
-
-/* Map pci device */
-int
-rte_pci_map_device(struct rte_pci_device *dev)
-{
- int ret = -1;
-
- /* try mapping the NIC resources using VFIO if it exists */
- switch (dev->kdrv) {
- case RTE_KDRV_VFIO:
-#ifdef VFIO_PRESENT
- if (pci_vfio_is_enabled())
- ret = pci_vfio_map_resource(dev);
-#endif
- break;
- case RTE_KDRV_IGB_UIO:
- case RTE_KDRV_UIO_GENERIC:
- if (rte_eal_using_phys_addrs()) {
- /* map resources for devices that use uio */
- ret = pci_uio_map_resource(dev);
- }
- break;
- default:
- RTE_LOG(DEBUG, EAL,
- " Not managed by a supported kernel driver, skipped\n");
- ret = 1;
- break;
- }
-
- return ret;
-}
-
-/* Unmap pci device */
-void
-rte_pci_unmap_device(struct rte_pci_device *dev)
-{
- /* try unmapping the NIC resources using VFIO if it exists */
- switch (dev->kdrv) {
- case RTE_KDRV_VFIO:
-#ifdef VFIO_PRESENT
- if (pci_vfio_is_enabled())
- pci_vfio_unmap_resource(dev);
-#endif
- break;
- case RTE_KDRV_IGB_UIO:
- case RTE_KDRV_UIO_GENERIC:
- /* unmap resources for devices that use uio */
- pci_uio_unmap_resource(dev);
- break;
- default:
- RTE_LOG(DEBUG, EAL,
- " Not managed by a supported kernel driver, skipped\n");
- break;
- }
-}
-
-void *
-pci_find_max_end_va(void)
-{
- const struct rte_memseg *seg = rte_eal_get_physmem_layout();
- const struct rte_memseg *last = seg;
- unsigned i = 0;
-
- for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) {
- if (seg->addr == NULL)
- break;
-
- if (seg->addr > last->addr)
- last = seg;
-
- }
- return RTE_PTR_ADD(last->addr, last->len);
-}
-
-/* parse one line of the "resource" sysfs file (note that the 'line'
- * string is modified)
- */
-int
-pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr,
- uint64_t *end_addr, uint64_t *flags)
-{
- union pci_resource_info {
- struct {
- char *phys_addr;
- char *end_addr;
- char *flags;
- };
- char *ptrs[PCI_RESOURCE_FMT_NVAL];
- } res_info;
-
- if (rte_strsplit(line, len, res_info.ptrs, 3, ' ') != 3) {
- RTE_LOG(ERR, EAL,
- "%s(): bad resource format\n", __func__);
- return -1;
- }
- errno = 0;
- *phys_addr = strtoull(res_info.phys_addr, NULL, 16);
- *end_addr = strtoull(res_info.end_addr, NULL, 16);
- *flags = strtoull(res_info.flags, NULL, 16);
- if (errno != 0) {
- RTE_LOG(ERR, EAL,
- "%s(): bad resource format\n", __func__);
- return -1;
- }
-
- return 0;
-}
-
-/* parse the "resource" sysfs file */
-static int
-pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev)
-{
- FILE *f;
- char buf[BUFSIZ];
- int i;
- uint64_t phys_addr, end_addr, flags;
-
- f = fopen(filename, "r");
- if (f == NULL) {
- RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n");
- return -1;
- }
-
- for (i = 0; i<PCI_MAX_RESOURCE; i++) {
-
- if (fgets(buf, sizeof(buf), f) == NULL) {
- RTE_LOG(ERR, EAL,
- "%s(): cannot read resource\n", __func__);
- goto error;
- }
- if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
- &end_addr, &flags) < 0)
- goto error;
-
- if (flags & IORESOURCE_MEM) {
- dev->mem_resource[i].phys_addr = phys_addr;
- dev->mem_resource[i].len = end_addr - phys_addr + 1;
- /* not mapped for now */
- dev->mem_resource[i].addr = NULL;
- }
- }
- fclose(f);
- return 0;
-
-error:
- fclose(f);
- return -1;
-}
-
-/* Scan one pci sysfs entry, and fill the devices list from it. */
-static int
-pci_scan_one(const char *dirname, const struct rte_pci_addr *addr)
-{
- char filename[PATH_MAX];
- unsigned long tmp;
- struct rte_pci_device *dev;
- char driver[PATH_MAX];
- int ret;
-
- dev = malloc(sizeof(*dev));
- if (dev == NULL)
- return -1;
-
- memset(dev, 0, sizeof(*dev));
- dev->addr = *addr;
-
- /* get vendor id */
- snprintf(filename, sizeof(filename), "%s/vendor", dirname);
- if (eal_parse_sysfs_value(filename, &tmp) < 0) {
- free(dev);
- return -1;
- }
- dev->id.vendor_id = (uint16_t)tmp;
-
- /* get device id */
- snprintf(filename, sizeof(filename), "%s/device", dirname);
- if (eal_parse_sysfs_value(filename, &tmp) < 0) {
- free(dev);
- return -1;
- }
- dev->id.device_id = (uint16_t)tmp;
-
- /* get subsystem_vendor id */
- snprintf(filename, sizeof(filename), "%s/subsystem_vendor",
- dirname);
- if (eal_parse_sysfs_value(filename, &tmp) < 0) {
- free(dev);
- return -1;
- }
- dev->id.subsystem_vendor_id = (uint16_t)tmp;
-
- /* get subsystem_device id */
- snprintf(filename, sizeof(filename), "%s/subsystem_device",
- dirname);
- if (eal_parse_sysfs_value(filename, &tmp) < 0) {
- free(dev);
- return -1;
- }
- dev->id.subsystem_device_id = (uint16_t)tmp;
-
- /* get class_id */
- snprintf(filename, sizeof(filename), "%s/class",
- dirname);
- if (eal_parse_sysfs_value(filename, &tmp) < 0) {
- free(dev);
- return -1;
- }
- /* the least 24 bits are valid: class, subclass, program interface */
- dev->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID;
-
- /* get max_vfs */
- dev->max_vfs = 0;
- snprintf(filename, sizeof(filename), "%s/max_vfs", dirname);
- if (!access(filename, F_OK) &&
- eal_parse_sysfs_value(filename, &tmp) == 0)
- dev->max_vfs = (uint16_t)tmp;
- else {
- /* for non igb_uio driver, need kernel version >= 3.8 */
- snprintf(filename, sizeof(filename),
- "%s/sriov_numvfs", dirname);
- if (!access(filename, F_OK) &&
- eal_parse_sysfs_value(filename, &tmp) == 0)
- dev->max_vfs = (uint16_t)tmp;
- }
-
- /* get numa node, default to 0 if not present */
- snprintf(filename, sizeof(filename), "%s/numa_node",
- dirname);
-
- if (access(filename, F_OK) != -1) {
- if (eal_parse_sysfs_value(filename, &tmp) == 0)
- dev->device.numa_node = tmp;
- else
- dev->device.numa_node = -1;
- } else {
- dev->device.numa_node = 0;
- }
-
- pci_name_set(dev);
-
- /* parse resources */
- snprintf(filename, sizeof(filename), "%s/resource", dirname);
- if (pci_parse_sysfs_resource(filename, dev) < 0) {
- RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__);
- free(dev);
- return -1;
- }
-
- /* parse driver */
- snprintf(filename, sizeof(filename), "%s/driver", dirname);
- ret = pci_get_kernel_driver_by_path(filename, driver);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, "Fail to get kernel driver\n");
- free(dev);
- return -1;
- }
-
- if (!ret) {
- if (!strcmp(driver, "vfio-pci"))
- dev->kdrv = RTE_KDRV_VFIO;
- else if (!strcmp(driver, "igb_uio"))
- dev->kdrv = RTE_KDRV_IGB_UIO;
- else if (!strcmp(driver, "uio_pci_generic"))
- dev->kdrv = RTE_KDRV_UIO_GENERIC;
- else
- dev->kdrv = RTE_KDRV_UNKNOWN;
- } else
- dev->kdrv = RTE_KDRV_NONE;
-
- /* device is valid, add in list (sorted) */
- if (TAILQ_EMPTY(&rte_pci_bus.device_list)) {
- rte_pci_add_device(dev);
- } else {
- struct rte_pci_device *dev2;
- int ret;
-
- TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) {
- ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr);
- if (ret > 0)
- continue;
-
- if (ret < 0) {
- rte_pci_insert_device(dev2, dev);
- } else { /* already registered */
- dev2->kdrv = dev->kdrv;
- dev2->max_vfs = dev->max_vfs;
- pci_name_set(dev2);
- memmove(dev2->mem_resource, dev->mem_resource,
- sizeof(dev->mem_resource));
- free(dev);
- }
- return 0;
- }
-
- rte_pci_add_device(dev);
- }
-
- return 0;
-}
-
-int
-pci_update_device(const struct rte_pci_addr *addr)
-{
- char filename[PATH_MAX];
-
- snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT,
- pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
- addr->function);
-
- return pci_scan_one(filename, addr);
-}
-
-/*
- * split up a pci address into its constituent parts.
- */
-static int
-parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr)
-{
- /* first split on ':' */
- union splitaddr {
- struct {
- char *domain;
- char *bus;
- char *devid;
- char *function;
- };
- char *str[PCI_FMT_NVAL]; /* last element-separator is "." not ":" */
- } splitaddr;
-
- char *buf_copy = strndup(buf, bufsize);
- if (buf_copy == NULL)
- return -1;
-
- if (rte_strsplit(buf_copy, bufsize, splitaddr.str, PCI_FMT_NVAL, ':')
- != PCI_FMT_NVAL - 1)
- goto error;
- /* final split is on '.' between devid and function */
- splitaddr.function = strchr(splitaddr.devid,'.');
- if (splitaddr.function == NULL)
- goto error;
- *splitaddr.function++ = '\0';
-
- /* now convert to int values */
- errno = 0;
- addr->domain = strtoul(splitaddr.domain, NULL, 16);
- addr->bus = strtoul(splitaddr.bus, NULL, 16);
- addr->devid = strtoul(splitaddr.devid, NULL, 16);
- addr->function = strtoul(splitaddr.function, NULL, 10);
- if (errno != 0)
- goto error;
-
- free(buf_copy); /* free the copy made with strdup */
- return 0;
-error:
- free(buf_copy);
- return -1;
-}
-
-/*
- * Scan the content of the PCI bus, and the devices in the devices
- * list
- */
-int
-rte_pci_scan(void)
-{
- struct dirent *e;
- DIR *dir;
- char dirname[PATH_MAX];
- struct rte_pci_addr addr;
-
- /* for debug purposes, PCI can be disabled */
- if (internal_config.no_pci)
- return 0;
-
- dir = opendir(pci_get_sysfs_path());
- if (dir == NULL) {
- RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
- __func__, strerror(errno));
- return -1;
- }
-
- while ((e = readdir(dir)) != NULL) {
- if (e->d_name[0] == '.')
- continue;
-
- if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0)
- continue;
-
- snprintf(dirname, sizeof(dirname), "%s/%s",
- pci_get_sysfs_path(), e->d_name);
-
- if (pci_scan_one(dirname, &addr) < 0)
- goto error;
- }
- closedir(dir);
- return 0;
-
-error:
- closedir(dir);
- return -1;
-}
-
-/* Read PCI config space. */
-int rte_pci_read_config(const struct rte_pci_device *device,
- void *buf, size_t len, off_t offset)
-{
- const struct rte_intr_handle *intr_handle = &device->intr_handle;
-
- switch (intr_handle->type) {
- case RTE_INTR_HANDLE_UIO:
- case RTE_INTR_HANDLE_UIO_INTX:
- return pci_uio_read_config(intr_handle, buf, len, offset);
-
-#ifdef VFIO_PRESENT
- case RTE_INTR_HANDLE_VFIO_MSIX:
- case RTE_INTR_HANDLE_VFIO_MSI:
- case RTE_INTR_HANDLE_VFIO_LEGACY:
- return pci_vfio_read_config(intr_handle, buf, len, offset);
-#endif
- default:
- RTE_LOG(ERR, EAL,
- "Unknown handle type of fd %d\n",
- intr_handle->fd);
- return -1;
- }
-}
-
-/* Write PCI config space. */
-int rte_pci_write_config(const struct rte_pci_device *device,
- const void *buf, size_t len, off_t offset)
-{
- const struct rte_intr_handle *intr_handle = &device->intr_handle;
-
- switch (intr_handle->type) {
- case RTE_INTR_HANDLE_UIO:
- case RTE_INTR_HANDLE_UIO_INTX:
- return pci_uio_write_config(intr_handle, buf, len, offset);
-
-#ifdef VFIO_PRESENT
- case RTE_INTR_HANDLE_VFIO_MSIX:
- case RTE_INTR_HANDLE_VFIO_MSI:
- case RTE_INTR_HANDLE_VFIO_LEGACY:
- return pci_vfio_write_config(intr_handle, buf, len, offset);
-#endif
- default:
- RTE_LOG(ERR, EAL,
- "Unknown handle type of fd %d\n",
- intr_handle->fd);
- return -1;
- }
-}
-
-#if defined(RTE_ARCH_X86)
-static int
-pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
- struct rte_pci_ioport *p)
-{
- uint16_t start, end;
- FILE *fp;
- char *line = NULL;
- char pci_id[16];
- int found = 0;
- size_t linesz;
-
- snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
- dev->addr.domain, dev->addr.bus,
- dev->addr.devid, dev->addr.function);
-
- fp = fopen("/proc/ioports", "r");
- if (fp == NULL) {
- RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
- return -1;
- }
-
- while (getdelim(&line, &linesz, '\n', fp) > 0) {
- char *ptr = line;
- char *left;
- int n;
-
- n = strcspn(ptr, ":");
- ptr[n] = 0;
- left = &ptr[n + 1];
-
- while (*left && isspace(*left))
- left++;
-
- if (!strncmp(left, pci_id, strlen(pci_id))) {
- found = 1;
-
- while (*ptr && isspace(*ptr))
- ptr++;
-
- sscanf(ptr, "%04hx-%04hx", &start, &end);
-
- break;
- }
- }
-
- free(line);
- fclose(fp);
-
- if (!found)
- return -1;
-
- dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
- p->base = start;
- RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
-
- return 0;
-}
-#endif
-
-int
-rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p)
-{
- int ret = -1;
-
- switch (dev->kdrv) {
-#ifdef VFIO_PRESENT
- case RTE_KDRV_VFIO:
- if (pci_vfio_is_enabled())
- ret = pci_vfio_ioport_map(dev, bar, p);
- break;
-#endif
- case RTE_KDRV_IGB_UIO:
- ret = pci_uio_ioport_map(dev, bar, p);
- break;
- case RTE_KDRV_UIO_GENERIC:
-#if defined(RTE_ARCH_X86)
- ret = pci_ioport_map(dev, bar, p);
-#else
- ret = pci_uio_ioport_map(dev, bar, p);
-#endif
- break;
- case RTE_KDRV_NONE:
-#if defined(RTE_ARCH_X86)
- ret = pci_ioport_map(dev, bar, p);
-#endif
- break;
- default:
- break;
- }
-
- if (!ret)
- p->dev = dev;
-
- return ret;
-}
-
-void
-rte_pci_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset)
-{
- switch (p->dev->kdrv) {
-#ifdef VFIO_PRESENT
- case RTE_KDRV_VFIO:
- pci_vfio_ioport_read(p, data, len, offset);
- break;
-#endif
- case RTE_KDRV_IGB_UIO:
- pci_uio_ioport_read(p, data, len, offset);
- break;
- case RTE_KDRV_UIO_GENERIC:
- pci_uio_ioport_read(p, data, len, offset);
- break;
- case RTE_KDRV_NONE:
-#if defined(RTE_ARCH_X86)
- pci_uio_ioport_read(p, data, len, offset);
-#endif
- break;
- default:
- break;
- }
-}
-
-void
-rte_pci_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset)
-{
- switch (p->dev->kdrv) {
-#ifdef VFIO_PRESENT
- case RTE_KDRV_VFIO:
- pci_vfio_ioport_write(p, data, len, offset);
- break;
-#endif
- case RTE_KDRV_IGB_UIO:
- pci_uio_ioport_write(p, data, len, offset);
- break;
- case RTE_KDRV_UIO_GENERIC:
- pci_uio_ioport_write(p, data, len, offset);
- break;
- case RTE_KDRV_NONE:
-#if defined(RTE_ARCH_X86)
- pci_uio_ioport_write(p, data, len, offset);
-#endif
- break;
- default:
- break;
- }
-}
-
-int
-rte_pci_ioport_unmap(struct rte_pci_ioport *p)
-{
- int ret = -1;
-
- switch (p->dev->kdrv) {
-#ifdef VFIO_PRESENT
- case RTE_KDRV_VFIO:
- if (pci_vfio_is_enabled())
- ret = pci_vfio_ioport_unmap(p);
- break;
-#endif
- case RTE_KDRV_IGB_UIO:
- ret = pci_uio_ioport_unmap(p);
- break;
- case RTE_KDRV_UIO_GENERIC:
-#if defined(RTE_ARCH_X86)
- ret = 0;
-#else
- ret = pci_uio_ioport_unmap(p);
-#endif
- break;
- case RTE_KDRV_NONE:
-#if defined(RTE_ARCH_X86)
- ret = 0;
-#endif
- break;
- default:
- break;
- }
-
- return ret;
-}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
deleted file mode 100644
index ae2980d6..00000000
--- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef EAL_PCI_INIT_H_
-#define EAL_PCI_INIT_H_
-
-#include "eal_vfio.h"
-
-/** IO resource type: */
-#define IORESOURCE_IO 0x00000100
-#define IORESOURCE_MEM 0x00000200
-
-/*
- * Helper function to map PCI resources right after hugepages in virtual memory
- */
-extern void *pci_map_addr;
-void *pci_find_max_end_va(void);
-
-/* parse one line of the "resource" sysfs file (note that the 'line'
- * string is modified)
- */
-int pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr,
- uint64_t *end_addr, uint64_t *flags);
-
-int pci_uio_alloc_resource(struct rte_pci_device *dev,
- struct mapped_pci_resource **uio_res);
-void pci_uio_free_resource(struct rte_pci_device *dev,
- struct mapped_pci_resource *uio_res);
-int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
- struct mapped_pci_resource *uio_res, int map_idx);
-
-int pci_uio_read_config(const struct rte_intr_handle *intr_handle,
- void *buf, size_t len, off_t offs);
-int pci_uio_write_config(const struct rte_intr_handle *intr_handle,
- const void *buf, size_t len, off_t offs);
-
-int pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p);
-void pci_uio_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset);
-void pci_uio_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset);
-int pci_uio_ioport_unmap(struct rte_pci_ioport *p);
-
-#ifdef VFIO_PRESENT
-
-/* access config space */
-int pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
- void *buf, size_t len, off_t offs);
-int pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
- const void *buf, size_t len, off_t offs);
-
-int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p);
-void pci_vfio_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset);
-void pci_vfio_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset);
-int pci_vfio_ioport_unmap(struct rte_pci_ioport *p);
-
-/* map/unmap VFIO resource prototype */
-int pci_vfio_map_resource(struct rte_pci_device *dev);
-int pci_vfio_unmap_resource(struct rte_pci_device *dev);
-
-#endif
-
-#endif /* EAL_PCI_INIT_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
deleted file mode 100644
index fa10329f..00000000
--- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
+++ /dev/null
@@ -1,567 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <string.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <dirent.h>
-#include <inttypes.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <sys/sysmacros.h>
-#include <linux/pci_regs.h>
-
-#if defined(RTE_ARCH_X86)
-#include <sys/io.h>
-#endif
-
-#include <rte_log.h>
-#include <rte_pci.h>
-#include <rte_eal_memconfig.h>
-#include <rte_common.h>
-#include <rte_malloc.h>
-
-#include "eal_filesystem.h"
-#include "eal_pci_init.h"
-
-void *pci_map_addr = NULL;
-
-#define OFF_MAX ((uint64_t)(off_t)-1)
-
-int
-pci_uio_read_config(const struct rte_intr_handle *intr_handle,
- void *buf, size_t len, off_t offset)
-{
- return pread(intr_handle->uio_cfg_fd, buf, len, offset);
-}
-
-int
-pci_uio_write_config(const struct rte_intr_handle *intr_handle,
- const void *buf, size_t len, off_t offset)
-{
- return pwrite(intr_handle->uio_cfg_fd, buf, len, offset);
-}
-
-static int
-pci_uio_set_bus_master(int dev_fd)
-{
- uint16_t reg;
- int ret;
-
- ret = pread(dev_fd, &reg, sizeof(reg), PCI_COMMAND);
- if (ret != sizeof(reg)) {
- RTE_LOG(ERR, EAL,
- "Cannot read command from PCI config space!\n");
- return -1;
- }
-
- /* return if bus mastering is already on */
- if (reg & PCI_COMMAND_MASTER)
- return 0;
-
- reg |= PCI_COMMAND_MASTER;
-
- ret = pwrite(dev_fd, &reg, sizeof(reg), PCI_COMMAND);
- if (ret != sizeof(reg)) {
- RTE_LOG(ERR, EAL,
- "Cannot write command to PCI config space!\n");
- return -1;
- }
-
- return 0;
-}
-
-static int
-pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num)
-{
- FILE *f;
- char filename[PATH_MAX];
- int ret;
- unsigned major, minor;
- dev_t dev;
-
- /* get the name of the sysfs file that contains the major and minor
- * of the uio device and read its content */
- snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path);
-
- f = fopen(filename, "r");
- if (f == NULL) {
- RTE_LOG(ERR, EAL, "%s(): cannot open sysfs to get major:minor\n",
- __func__);
- return -1;
- }
-
- ret = fscanf(f, "%u:%u", &major, &minor);
- if (ret != 2) {
- RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs to get major:minor\n",
- __func__);
- fclose(f);
- return -1;
- }
- fclose(f);
-
- /* create the char device "mknod /dev/uioX c major minor" */
- snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
- dev = makedev(major, minor);
- ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev);
- if (ret != 0) {
- RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n",
- __func__, strerror(errno));
- return -1;
- }
-
- return ret;
-}
-
-/*
- * Return the uioX char device used for a pci device. On success, return
- * the UIO number and fill dstbuf string with the path of the device in
- * sysfs. On error, return a negative value. In this case dstbuf is
- * invalid.
- */
-static int
-pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf,
- unsigned int buflen, int create)
-{
- struct rte_pci_addr *loc = &dev->addr;
- unsigned int uio_num;
- struct dirent *e;
- DIR *dir;
- char dirname[PATH_MAX];
-
- /* depending on kernel version, uio can be located in uio/uioX
- * or uio:uioX */
-
- snprintf(dirname, sizeof(dirname),
- "%s/" PCI_PRI_FMT "/uio", pci_get_sysfs_path(),
- loc->domain, loc->bus, loc->devid, loc->function);
-
- dir = opendir(dirname);
- if (dir == NULL) {
- /* retry with the parent directory */
- snprintf(dirname, sizeof(dirname),
- "%s/" PCI_PRI_FMT, pci_get_sysfs_path(),
- loc->domain, loc->bus, loc->devid, loc->function);
- dir = opendir(dirname);
-
- if (dir == NULL) {
- RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname);
- return -1;
- }
- }
-
- /* take the first file starting with "uio" */
- while ((e = readdir(dir)) != NULL) {
- /* format could be uio%d ...*/
- int shortprefix_len = sizeof("uio") - 1;
- /* ... or uio:uio%d */
- int longprefix_len = sizeof("uio:uio") - 1;
- char *endptr;
-
- if (strncmp(e->d_name, "uio", 3) != 0)
- continue;
-
- /* first try uio%d */
- errno = 0;
- uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
- if (errno == 0 && endptr != (e->d_name + shortprefix_len)) {
- snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num);
- break;
- }
-
- /* then try uio:uio%d */
- errno = 0;
- uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10);
- if (errno == 0 && endptr != (e->d_name + longprefix_len)) {
- snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num);
- break;
- }
- }
- closedir(dir);
-
- /* No uio resource found */
- if (e == NULL)
- return -1;
-
- /* create uio device if we've been asked to */
- if (internal_config.create_uio_dev && create &&
- pci_mknod_uio_dev(dstbuf, uio_num) < 0)
- RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num);
-
- return uio_num;
-}
-
-void
-pci_uio_free_resource(struct rte_pci_device *dev,
- struct mapped_pci_resource *uio_res)
-{
- rte_free(uio_res);
-
- if (dev->intr_handle.uio_cfg_fd >= 0) {
- close(dev->intr_handle.uio_cfg_fd);
- dev->intr_handle.uio_cfg_fd = -1;
- }
- if (dev->intr_handle.fd >= 0) {
- close(dev->intr_handle.fd);
- dev->intr_handle.fd = -1;
- dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
- }
-}
-
-int
-pci_uio_alloc_resource(struct rte_pci_device *dev,
- struct mapped_pci_resource **uio_res)
-{
- char dirname[PATH_MAX];
- char cfgname[PATH_MAX];
- char devname[PATH_MAX]; /* contains the /dev/uioX */
- int uio_num;
- struct rte_pci_addr *loc;
-
- loc = &dev->addr;
-
- /* find uio resource */
- uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 1);
- if (uio_num < 0) {
- RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, "
- "skipping\n", loc->domain, loc->bus, loc->devid, loc->function);
- return 1;
- }
- snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
-
- /* save fd if in primary process */
- dev->intr_handle.fd = open(devname, O_RDWR);
- if (dev->intr_handle.fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
- devname, strerror(errno));
- goto error;
- }
-
- snprintf(cfgname, sizeof(cfgname),
- "/sys/class/uio/uio%u/device/config", uio_num);
- dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR);
- if (dev->intr_handle.uio_cfg_fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
- cfgname, strerror(errno));
- goto error;
- }
-
- if (dev->kdrv == RTE_KDRV_IGB_UIO)
- dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
- else {
- dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX;
-
- /* set bus master that is not done by uio_pci_generic */
- if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) {
- RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n");
- goto error;
- }
- }
-
- /* allocate the mapping details for secondary processes*/
- *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0);
- if (*uio_res == NULL) {
- RTE_LOG(ERR, EAL,
- "%s(): cannot store uio mmap details\n", __func__);
- goto error;
- }
-
- snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname);
- memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr));
-
- return 0;
-
-error:
- pci_uio_free_resource(dev, *uio_res);
- return -1;
-}
-
-int
-pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
- struct mapped_pci_resource *uio_res, int map_idx)
-{
- int fd;
- char devname[PATH_MAX];
- void *mapaddr;
- struct rte_pci_addr *loc;
- struct pci_map *maps;
-
- loc = &dev->addr;
- maps = uio_res->maps;
-
- /* update devname for mmap */
- snprintf(devname, sizeof(devname),
- "%s/" PCI_PRI_FMT "/resource%d",
- pci_get_sysfs_path(),
- loc->domain, loc->bus, loc->devid,
- loc->function, res_idx);
-
- /* allocate memory to keep path */
- maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
- if (maps[map_idx].path == NULL) {
- RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
- strerror(errno));
- return -1;
- }
-
- /*
- * open resource file, to mmap it
- */
- fd = open(devname, O_RDWR);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
- devname, strerror(errno));
- goto error;
- }
-
- /* try mapping somewhere close to the end of hugepages */
- if (pci_map_addr == NULL)
- pci_map_addr = pci_find_max_end_va();
-
- mapaddr = pci_map_resource(pci_map_addr, fd, 0,
- (size_t)dev->mem_resource[res_idx].len, 0);
- close(fd);
- if (mapaddr == MAP_FAILED)
- goto error;
-
- pci_map_addr = RTE_PTR_ADD(mapaddr,
- (size_t)dev->mem_resource[res_idx].len);
-
- maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
- maps[map_idx].size = dev->mem_resource[res_idx].len;
- maps[map_idx].addr = mapaddr;
- maps[map_idx].offset = 0;
- strcpy(maps[map_idx].path, devname);
- dev->mem_resource[res_idx].addr = mapaddr;
-
- return 0;
-
-error:
- rte_free(maps[map_idx].path);
- return -1;
-}
-
-#if defined(RTE_ARCH_X86)
-int
-pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p)
-{
- char dirname[PATH_MAX];
- char filename[PATH_MAX];
- int uio_num;
- unsigned long start;
-
- uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
- if (uio_num < 0)
- return -1;
-
- /* get portio start */
- snprintf(filename, sizeof(filename),
- "%s/portio/port%d/start", dirname, bar);
- if (eal_parse_sysfs_value(filename, &start) < 0) {
- RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
- __func__);
- return -1;
- }
- /* ensure we don't get anything funny here, read/write will cast to
- * uin16_t */
- if (start > UINT16_MAX)
- return -1;
-
- /* FIXME only for primary process ? */
- if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
-
- snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
- dev->intr_handle.fd = open(filename, O_RDWR);
- if (dev->intr_handle.fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
- filename, strerror(errno));
- return -1;
- }
- dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
- }
-
- RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
-
- p->base = start;
- p->len = 0;
- return 0;
-}
-#else
-int
-pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p)
-{
- FILE *f;
- char buf[BUFSIZ];
- char filename[PATH_MAX];
- uint64_t phys_addr, end_addr, flags;
- int fd, i;
- void *addr;
-
- /* open and read addresses of the corresponding resource in sysfs */
- snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
- pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
- dev->addr.devid, dev->addr.function);
- f = fopen(filename, "r");
- if (f == NULL) {
- RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n",
- strerror(errno));
- return -1;
- }
- for (i = 0; i < bar + 1; i++) {
- if (fgets(buf, sizeof(buf), f) == NULL) {
- RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n");
- goto error;
- }
- }
- if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
- &end_addr, &flags) < 0)
- goto error;
- if ((flags & IORESOURCE_IO) == 0) {
- RTE_LOG(ERR, EAL, "BAR %d is not an IO resource\n", bar);
- goto error;
- }
- snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource%d",
- pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
- dev->addr.devid, dev->addr.function, bar);
-
- /* mmap the pci resource */
- fd = open(filename, O_RDWR);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
- strerror(errno));
- goto error;
- }
- addr = mmap(NULL, end_addr + 1, PROT_READ | PROT_WRITE,
- MAP_SHARED, fd, 0);
- close(fd);
- if (addr == MAP_FAILED) {
- RTE_LOG(ERR, EAL, "Cannot mmap IO port resource: %s\n",
- strerror(errno));
- goto error;
- }
-
- /* strangely, the base address is mmap addr + phys_addr */
- p->base = (uintptr_t)addr + phys_addr;
- p->len = end_addr + 1;
- RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%"PRIx64"\n", p->base);
- fclose(f);
-
- return 0;
-
-error:
- fclose(f);
- return -1;
-}
-#endif
-
-void
-pci_uio_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset)
-{
- uint8_t *d;
- int size;
- uintptr_t reg = p->base + offset;
-
- for (d = data; len > 0; d += size, reg += size, len -= size) {
- if (len >= 4) {
- size = 4;
-#if defined(RTE_ARCH_X86)
- *(uint32_t *)d = inl(reg);
-#else
- *(uint32_t *)d = *(volatile uint32_t *)reg;
-#endif
- } else if (len >= 2) {
- size = 2;
-#if defined(RTE_ARCH_X86)
- *(uint16_t *)d = inw(reg);
-#else
- *(uint16_t *)d = *(volatile uint16_t *)reg;
-#endif
- } else {
- size = 1;
-#if defined(RTE_ARCH_X86)
- *d = inb(reg);
-#else
- *d = *(volatile uint8_t *)reg;
-#endif
- }
- }
-}
-
-void
-pci_uio_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset)
-{
- const uint8_t *s;
- int size;
- uintptr_t reg = p->base + offset;
-
- for (s = data; len > 0; s += size, reg += size, len -= size) {
- if (len >= 4) {
- size = 4;
-#if defined(RTE_ARCH_X86)
- outl_p(*(const uint32_t *)s, reg);
-#else
- *(volatile uint32_t *)reg = *(const uint32_t *)s;
-#endif
- } else if (len >= 2) {
- size = 2;
-#if defined(RTE_ARCH_X86)
- outw_p(*(const uint16_t *)s, reg);
-#else
- *(volatile uint16_t *)reg = *(const uint16_t *)s;
-#endif
- } else {
- size = 1;
-#if defined(RTE_ARCH_X86)
- outb_p(*s, reg);
-#else
- *(volatile uint8_t *)reg = *s;
-#endif
- }
- }
-}
-
-int
-pci_uio_ioport_unmap(struct rte_pci_ioport *p)
-{
-#if defined(RTE_ARCH_X86)
- RTE_SET_USED(p);
- /* FIXME close intr fd ? */
- return 0;
-#else
- return munmap((void *)(uintptr_t)p->base, p->len);
-#endif
-}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
deleted file mode 100644
index aa9d96ed..00000000
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ /dev/null
@@ -1,674 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <string.h>
-#include <fcntl.h>
-#include <linux/pci_regs.h>
-#include <sys/eventfd.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <stdbool.h>
-
-#include <rte_log.h>
-#include <rte_pci.h>
-#include <rte_eal_memconfig.h>
-#include <rte_malloc.h>
-
-#include "eal_filesystem.h"
-#include "eal_pci_init.h"
-#include "eal_vfio.h"
-#include "eal_private.h"
-
-/**
- * @file
- * PCI probing under linux (VFIO version)
- *
- * This code tries to determine if the PCI device is bound to VFIO driver,
- * and initialize it (map BARs, set up interrupts) if that's the case.
- *
- * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y".
- */
-
-#ifdef VFIO_PRESENT
-
-#define PAGE_SIZE (sysconf(_SC_PAGESIZE))
-#define PAGE_MASK (~(PAGE_SIZE - 1))
-
-static struct rte_tailq_elem rte_vfio_tailq = {
- .name = "VFIO_RESOURCE_LIST",
-};
-EAL_REGISTER_TAILQ(rte_vfio_tailq)
-
-int
-pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
- void *buf, size_t len, off_t offs)
-{
- return pread64(intr_handle->vfio_dev_fd, buf, len,
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
-}
-
-int
-pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
- const void *buf, size_t len, off_t offs)
-{
- return pwrite64(intr_handle->vfio_dev_fd, buf, len,
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
-}
-
-/* get PCI BAR number where MSI-X interrupts are */
-static int
-pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
- uint32_t *msix_table_size)
-{
- int ret;
- uint32_t reg;
- uint16_t flags;
- uint8_t cap_id, cap_offset;
-
- /* read PCI capability pointer from config space */
- ret = pread64(fd, &reg, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- PCI_CAPABILITY_LIST);
- if (ret != sizeof(reg)) {
- RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI "
- "config space!\n");
- return -1;
- }
-
- /* we need first byte */
- cap_offset = reg & 0xFF;
-
- while (cap_offset) {
-
- /* read PCI capability ID */
- ret = pread64(fd, &reg, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- cap_offset);
- if (ret != sizeof(reg)) {
- RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI "
- "config space!\n");
- return -1;
- }
-
- /* we need first byte */
- cap_id = reg & 0xFF;
-
- /* if we haven't reached MSI-X, check next capability */
- if (cap_id != PCI_CAP_ID_MSIX) {
- ret = pread64(fd, &reg, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- cap_offset);
- if (ret != sizeof(reg)) {
- RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI "
- "config space!\n");
- return -1;
- }
-
- /* we need second byte */
- cap_offset = (reg & 0xFF00) >> 8;
-
- continue;
- }
- /* else, read table offset */
- else {
- /* table offset resides in the next 4 bytes */
- ret = pread64(fd, &reg, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- cap_offset + 4);
- if (ret != sizeof(reg)) {
- RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config "
- "space!\n");
- return -1;
- }
-
- ret = pread64(fd, &flags, sizeof(flags),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- cap_offset + 2);
- if (ret != sizeof(flags)) {
- RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config "
- "space!\n");
- return -1;
- }
-
- *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR;
- *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
- *msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
-
- return 0;
- }
- }
- return 0;
-}
-
-/* set PCI bus mastering */
-static int
-pci_vfio_set_bus_master(int dev_fd, bool op)
-{
- uint16_t reg;
- int ret;
-
- ret = pread64(dev_fd, &reg, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- PCI_COMMAND);
- if (ret != sizeof(reg)) {
- RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n");
- return -1;
- }
-
- if (op)
- /* set the master bit */
- reg |= PCI_COMMAND_MASTER;
- else
- reg &= ~(PCI_COMMAND_MASTER);
-
- ret = pwrite64(dev_fd, &reg, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- PCI_COMMAND);
-
- if (ret != sizeof(reg)) {
- RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n");
- return -1;
- }
-
- return 0;
-}
-
-/* set up interrupt support (but not enable interrupts) */
-static int
-pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
-{
- int i, ret, intr_idx;
-
- /* default to invalid index */
- intr_idx = VFIO_PCI_NUM_IRQS;
-
- /* get interrupt type from internal config (MSI-X by default, can be
- * overridden from the command line
- */
- switch (internal_config.vfio_intr_mode) {
- case RTE_INTR_MODE_MSIX:
- intr_idx = VFIO_PCI_MSIX_IRQ_INDEX;
- break;
- case RTE_INTR_MODE_MSI:
- intr_idx = VFIO_PCI_MSI_IRQ_INDEX;
- break;
- case RTE_INTR_MODE_LEGACY:
- intr_idx = VFIO_PCI_INTX_IRQ_INDEX;
- break;
- /* don't do anything if we want to automatically determine interrupt type */
- case RTE_INTR_MODE_NONE:
- break;
- default:
- RTE_LOG(ERR, EAL, " unknown default interrupt type!\n");
- return -1;
- }
-
- /* start from MSI-X interrupt type */
- for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) {
- struct vfio_irq_info irq = { .argsz = sizeof(irq) };
- int fd = -1;
-
- /* skip interrupt modes we don't want */
- if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE &&
- i != intr_idx)
- continue;
-
- irq.index = i;
-
- ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, " cannot get IRQ info, "
- "error %i (%s)\n", errno, strerror(errno));
- return -1;
- }
-
- /* if this vector cannot be used with eventfd, fail if we explicitly
- * specified interrupt type, otherwise continue */
- if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) {
- if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE) {
- RTE_LOG(ERR, EAL,
- " interrupt vector does not support eventfd!\n");
- return -1;
- } else
- continue;
- }
-
- /* set up an eventfd for interrupts */
- fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, " cannot set up eventfd, "
- "error %i (%s)\n", errno, strerror(errno));
- return -1;
- }
-
- dev->intr_handle.fd = fd;
- dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
-
- switch (i) {
- case VFIO_PCI_MSIX_IRQ_INDEX:
- internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX;
- dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX;
- break;
- case VFIO_PCI_MSI_IRQ_INDEX:
- internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI;
- dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI;
- break;
- case VFIO_PCI_INTX_IRQ_INDEX:
- internal_config.vfio_intr_mode = RTE_INTR_MODE_LEGACY;
- dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY;
- break;
- default:
- RTE_LOG(ERR, EAL, " unknown interrupt type!\n");
- return -1;
- }
-
- return 0;
- }
-
- /* if we're here, we haven't found a suitable interrupt vector */
- return -1;
-}
-
-/*
- * map the PCI resources of a PCI device in virtual memory (VFIO version).
- * primary and secondary processes follow almost exactly the same path
- */
-int
-pci_vfio_map_resource(struct rte_pci_device *dev)
-{
- struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
- char pci_addr[PATH_MAX] = {0};
- int vfio_dev_fd;
- struct rte_pci_addr *loc = &dev->addr;
- int i, ret, msix_bar;
- struct mapped_pci_resource *vfio_res = NULL;
- struct mapped_pci_res_list *vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
-
- struct pci_map *maps;
- uint32_t msix_table_offset = 0;
- uint32_t msix_table_size = 0;
- uint32_t ioport_bar;
-
- dev->intr_handle.fd = -1;
- dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
-
- /* store PCI address string */
- snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
- loc->domain, loc->bus, loc->devid, loc->function);
-
- if ((ret = vfio_setup_device(pci_get_sysfs_path(), pci_addr,
- &vfio_dev_fd, &device_info)))
- return ret;
-
- /* get MSI-X BAR, if any (we have to know where it is because we can't
- * easily mmap it when using VFIO) */
- msix_bar = -1;
- ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar,
- &msix_table_offset, &msix_table_size);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr);
- close(vfio_dev_fd);
- return -1;
- }
-
- /* if we're in a primary process, allocate vfio_res and get region info */
- if (internal_config.process_type == RTE_PROC_PRIMARY) {
- vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0);
- if (vfio_res == NULL) {
- RTE_LOG(ERR, EAL,
- "%s(): cannot store uio mmap details\n", __func__);
- close(vfio_dev_fd);
- return -1;
- }
- memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr));
-
- /* get number of registers (up to BAR5) */
- vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions,
- VFIO_PCI_BAR5_REGION_INDEX + 1);
- } else {
- /* if we're in a secondary process, just find our tailq entry */
- TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
- if (rte_eal_compare_pci_addr(&vfio_res->pci_addr,
- &dev->addr))
- continue;
- break;
- }
- /* if we haven't found our tailq entry, something's wrong */
- if (vfio_res == NULL) {
- RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n",
- pci_addr);
- close(vfio_dev_fd);
- return -1;
- }
- }
-
- /* map BARs */
- maps = vfio_res->maps;
-
- for (i = 0; i < (int) vfio_res->nb_maps; i++) {
- struct vfio_region_info reg = { .argsz = sizeof(reg) };
- void *bar_addr;
- struct memreg {
- unsigned long offset, size;
- } memreg[2] = {};
-
- reg.index = i;
-
- ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg);
-
- if (ret) {
- RTE_LOG(ERR, EAL, " %s cannot get device region info "
- "error %i (%s)\n", pci_addr, errno, strerror(errno));
- close(vfio_dev_fd);
- if (internal_config.process_type == RTE_PROC_PRIMARY)
- rte_free(vfio_res);
- return -1;
- }
-
- /* chk for io port region */
- ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
- + PCI_BASE_ADDRESS_0 + i*4);
-
- if (ret != sizeof(ioport_bar)) {
- RTE_LOG(ERR, EAL,
- "Cannot read command (%x) from config space!\n",
- PCI_BASE_ADDRESS_0 + i*4);
- return -1;
- }
-
- if (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO) {
- RTE_LOG(INFO, EAL,
- "Ignore mapping IO port bar(%d) addr: %x\n",
- i, ioport_bar);
- continue;
- }
-
- /* skip non-mmapable BARs */
- if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
- continue;
-
- if (i == msix_bar) {
- /*
- * VFIO will not let us map the MSI-X table,
- * but we can map around it.
- */
- uint32_t table_start = msix_table_offset;
- uint32_t table_end = table_start + msix_table_size;
- table_end = (table_end + ~PAGE_MASK) & PAGE_MASK;
- table_start &= PAGE_MASK;
-
- if (table_start == 0 && table_end >= reg.size) {
- /* Cannot map this BAR */
- RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i);
- continue;
- } else {
- memreg[0].offset = reg.offset;
- memreg[0].size = table_start;
- memreg[1].offset = reg.offset + table_end;
- memreg[1].size = reg.size - table_end;
-
- RTE_LOG(DEBUG, EAL,
- "Trying to map BAR %d that contains the MSI-X "
- "table. Trying offsets: "
- "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", i,
- memreg[0].offset, memreg[0].size,
- memreg[1].offset, memreg[1].size);
- }
- } else {
- memreg[0].offset = reg.offset;
- memreg[0].size = reg.size;
- }
-
- /* try to figure out an address */
- if (internal_config.process_type == RTE_PROC_PRIMARY) {
- /* try mapping somewhere close to the end of hugepages */
- if (pci_map_addr == NULL)
- pci_map_addr = pci_find_max_end_va();
-
- bar_addr = pci_map_addr;
- pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
- } else {
- bar_addr = maps[i].addr;
- }
-
- /* reserve the address using an inaccessible mapping */
- bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE |
- MAP_ANONYMOUS, -1, 0);
- if (bar_addr != MAP_FAILED) {
- void *map_addr = NULL;
- if (memreg[0].size) {
- /* actual map of first part */
- map_addr = pci_map_resource(bar_addr, vfio_dev_fd,
- memreg[0].offset,
- memreg[0].size,
- MAP_FIXED);
- }
-
- /* if there's a second part, try to map it */
- if (map_addr != MAP_FAILED
- && memreg[1].offset && memreg[1].size) {
- void *second_addr = RTE_PTR_ADD(bar_addr,
- memreg[1].offset -
- (uintptr_t)reg.offset);
- map_addr = pci_map_resource(second_addr,
- vfio_dev_fd, memreg[1].offset,
- memreg[1].size,
- MAP_FIXED);
- }
-
- if (map_addr == MAP_FAILED || !map_addr) {
- munmap(bar_addr, reg.size);
- bar_addr = MAP_FAILED;
- }
- }
-
- if (bar_addr == MAP_FAILED ||
- (internal_config.process_type == RTE_PROC_SECONDARY &&
- bar_addr != maps[i].addr)) {
- RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i,
- strerror(errno));
- close(vfio_dev_fd);
- if (internal_config.process_type == RTE_PROC_PRIMARY)
- rte_free(vfio_res);
- return -1;
- }
-
- maps[i].addr = bar_addr;
- maps[i].offset = reg.offset;
- maps[i].size = reg.size;
- maps[i].path = NULL; /* vfio doesn't have per-resource paths */
- dev->mem_resource[i].addr = bar_addr;
- }
-
- /* if secondary process, do not set up interrupts */
- if (internal_config.process_type == RTE_PROC_PRIMARY) {
- if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) {
- RTE_LOG(ERR, EAL, " %s error setting up interrupts!\n", pci_addr);
- close(vfio_dev_fd);
- rte_free(vfio_res);
- return -1;
- }
-
- /* set bus mastering for the device */
- if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
- RTE_LOG(ERR, EAL, " %s cannot set up bus mastering!\n", pci_addr);
- close(vfio_dev_fd);
- rte_free(vfio_res);
- return -1;
- }
-
- /* Reset the device */
- ioctl(vfio_dev_fd, VFIO_DEVICE_RESET);
- }
-
- if (internal_config.process_type == RTE_PROC_PRIMARY)
- TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);
-
- return 0;
-}
-
-int
-pci_vfio_unmap_resource(struct rte_pci_device *dev)
-{
- char pci_addr[PATH_MAX] = {0};
- struct rte_pci_addr *loc = &dev->addr;
- int i, ret;
- struct mapped_pci_resource *vfio_res = NULL;
- struct mapped_pci_res_list *vfio_res_list;
-
- struct pci_map *maps;
-
- /* store PCI address string */
- snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
- loc->domain, loc->bus, loc->devid, loc->function);
-
-
- if (close(dev->intr_handle.fd) < 0) {
- RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n",
- pci_addr);
- return -1;
- }
-
- if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) {
- RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n",
- pci_addr);
- return -1;
- }
-
- ret = vfio_release_device(pci_get_sysfs_path(), pci_addr,
- dev->intr_handle.vfio_dev_fd);
- if (ret < 0) {
- RTE_LOG(ERR, EAL,
- "%s(): cannot release device\n", __func__);
- return ret;
- }
-
- vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
- /* Get vfio_res */
- TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
- if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
- continue;
- break;
- }
- /* if we haven't found our tailq entry, something's wrong */
- if (vfio_res == NULL) {
- RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n",
- pci_addr);
- return -1;
- }
-
- /* unmap BARs */
- maps = vfio_res->maps;
-
- RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n",
- pci_addr);
- for (i = 0; i < (int) vfio_res->nb_maps; i++) {
-
- /*
- * We do not need to be aware of MSI-X table BAR mappings as
- * when mapping. Just using current maps array is enough
- */
- if (maps[i].addr) {
- RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n",
- pci_addr, maps[i].addr);
- pci_unmap_resource(maps[i].addr, maps[i].size);
- }
- }
-
- TAILQ_REMOVE(vfio_res_list, vfio_res, next);
-
- return 0;
-}
-
-int
-pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p)
-{
- if (bar < VFIO_PCI_BAR0_REGION_INDEX ||
- bar > VFIO_PCI_BAR5_REGION_INDEX) {
- RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar);
- return -1;
- }
-
- p->dev = dev;
- p->base = VFIO_GET_REGION_ADDR(bar);
- return 0;
-}
-
-void
-pci_vfio_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset)
-{
- const struct rte_intr_handle *intr_handle = &p->dev->intr_handle;
-
- if (pread64(intr_handle->vfio_dev_fd, data,
- len, p->base + offset) <= 0)
- RTE_LOG(ERR, EAL,
- "Can't read from PCI bar (%" PRIu64 ") : offset (%x)\n",
- VFIO_GET_REGION_IDX(p->base), (int)offset);
-}
-
-void
-pci_vfio_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset)
-{
- const struct rte_intr_handle *intr_handle = &p->dev->intr_handle;
-
- if (pwrite64(intr_handle->vfio_dev_fd, data,
- len, p->base + offset) <= 0)
- RTE_LOG(ERR, EAL,
- "Can't write to PCI bar (%" PRIu64 ") : offset (%x)\n",
- VFIO_GET_REGION_IDX(p->base), (int)offset);
-}
-
-int
-pci_vfio_ioport_unmap(struct rte_pci_ioport *p)
-{
- RTE_SET_USED(p);
- return -1;
-}
-
-int
-pci_vfio_enable(void)
-{
- return vfio_enable("vfio_pci");
-}
-
-int
-pci_vfio_is_enabled(void)
-{
- return vfio_is_enabled("vfio_pci");
-}
-#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
index 6481eeea..e9a579e4 100644
--- a/lib/librte_eal/linuxapp/eal/eal_thread.c
+++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
@@ -46,7 +46,6 @@
#include <rte_launch.h>
#include <rte_log.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_per_lcore.h>
#include <rte_eal.h>
#include <rte_lcore.h>
diff --git a/lib/librte_eal/linuxapp/eal/eal_timer.c b/lib/librte_eal/linuxapp/eal/eal_timer.c
index afa32f5c..24349dab 100644
--- a/lib/librte_eal/linuxapp/eal/eal_timer.c
+++ b/lib/librte_eal/linuxapp/eal/eal_timer.c
@@ -49,7 +49,6 @@
#include <rte_cycles.h>
#include <rte_lcore.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_debug.h>
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 946df7e3..58f0123e 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -39,6 +39,7 @@
#include <rte_log.h>
#include <rte_memory.h>
#include <rte_eal_memconfig.h>
+#include <rte_vfio.h>
#include "eal_filesystem.h"
#include "eal_vfio.h"
@@ -68,8 +69,8 @@ vfio_get_group_fd(int iommu_group_no)
{
int i;
int vfio_group_fd;
- int group_idx = -1;
char filename[PATH_MAX];
+ struct vfio_group *cur_grp;
/* check if we already have the group descriptor open */
for (i = 0; i < VFIO_MAX_GROUPS; i++)
@@ -85,12 +86,12 @@ vfio_get_group_fd(int iommu_group_no)
/* Now lets get an index for the new group */
for (i = 0; i < VFIO_MAX_GROUPS; i++)
if (vfio_cfg.vfio_groups[i].group_no == -1) {
- group_idx = i;
+ cur_grp = &vfio_cfg.vfio_groups[i];
break;
}
/* This should not happen */
- if (group_idx == -1) {
+ if (i == VFIO_MAX_GROUPS) {
RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
return -1;
}
@@ -123,8 +124,8 @@ vfio_get_group_fd(int iommu_group_no)
/* noiommu group found */
}
- vfio_cfg.vfio_groups[group_idx].group_no = iommu_group_no;
- vfio_cfg.vfio_groups[group_idx].fd = vfio_group_fd;
+ cur_grp->group_no = iommu_group_no;
+ cur_grp->fd = vfio_group_fd;
vfio_cfg.vfio_active_groups++;
return vfio_group_fd;
}
@@ -157,9 +158,12 @@ vfio_get_group_fd(int iommu_group_no)
return 0;
case SOCKET_OK:
vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd);
- /* if we got the fd, return it */
+ /* if we got the fd, store it and return it */
if (vfio_group_fd > 0) {
close(socket_fd);
+ cur_grp->group_no = iommu_group_no;
+ cur_grp->fd = vfio_group_fd;
+ vfio_cfg.vfio_active_groups++;
return vfio_group_fd;
}
/* fall-through on error */
@@ -280,7 +284,7 @@ clear_group(int vfio_group_fd)
}
int
-vfio_setup_device(const char *sysfs_base, const char *dev_addr,
+rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
int *vfio_dev_fd, struct vfio_device_info *device_info)
{
struct vfio_group_status group_status = {
@@ -412,7 +416,7 @@ vfio_setup_device(const char *sysfs_base, const char *dev_addr,
}
int
-vfio_release_device(const char *sysfs_base, const char *dev_addr,
+rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
int vfio_dev_fd)
{
struct vfio_group_status group_status = {
@@ -474,7 +478,7 @@ vfio_release_device(const char *sysfs_base, const char *dev_addr,
}
int
-vfio_enable(const char *modname)
+rte_vfio_enable(const char *modname)
{
/* initialize group list */
int i;
@@ -489,7 +493,7 @@ vfio_enable(const char *modname)
/* inform the user that we are probing for VFIO */
RTE_LOG(INFO, EAL, "Probing VFIO support...\n");
- /* check if vfio-pci module is loaded */
+ /* check if vfio module is loaded */
vfio_available = rte_eal_check_module(modname);
/* return error directly */
@@ -519,7 +523,7 @@ vfio_enable(const char *modname)
}
int
-vfio_is_enabled(const char *modname)
+rte_vfio_is_enabled(const char *modname)
{
const int mod_available = rte_eal_check_module(modname);
return vfio_cfg.vfio_enabled && mod_available;
@@ -706,7 +710,10 @@ vfio_type1_dma_map(int vfio_container_fd)
dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
dma_map.vaddr = ms[i].addr_64;
dma_map.size = ms[i].len;
- dma_map.iova = ms[i].phys_addr;
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ dma_map.iova = dma_map.vaddr;
+ else
+ dma_map.iova = ms[i].iova;
dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
@@ -759,10 +766,19 @@ vfio_spapr_dma_map(int vfio_container_fd)
return -1;
}
- /* calculate window size based on number of hugepages configured */
- create.window_size = rte_eal_get_physmem_size();
+ /* create DMA window from 0 to max(phys_addr + len) */
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ if (ms[i].addr == NULL)
+ break;
+
+ create.window_size = RTE_MAX(create.window_size,
+ ms[i].iova + ms[i].len);
+ }
+
+ /* sPAPR requires window size to be a power of 2 */
+ create.window_size = rte_align64pow2(create.window_size);
create.page_shift = __builtin_ctzll(ms->hugepage_sz);
- create.levels = 2;
+ create.levels = 1;
ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create);
if (ret) {
@@ -771,6 +787,11 @@ vfio_spapr_dma_map(int vfio_container_fd)
return -1;
}
+ if (create.start_addr != 0) {
+ RTE_LOG(ERR, EAL, " DMA window start address != 0\n");
+ return -1;
+ }
+
/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
for (i = 0; i < RTE_MAX_MEMSEG; i++) {
struct vfio_iommu_type1_dma_map dma_map;
@@ -792,7 +813,10 @@ vfio_spapr_dma_map(int vfio_container_fd)
dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
dma_map.vaddr = ms[i].addr_64;
dma_map.size = ms[i].len;
- dma_map.iova = ms[i].phys_addr;
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ dma_map.iova = dma_map.vaddr;
+ else
+ dma_map.iova = ms[i].iova;
dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
VFIO_DMA_MAP_FLAG_WRITE;
@@ -816,4 +840,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
return 0;
}
+int
+rte_vfio_noiommu_is_enabled(void)
+{
+ int fd, ret, cnt __rte_unused;
+ char c;
+
+ ret = -1;
+ fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ cnt = read(fd, &c, 1);
+ if (c == 'Y')
+ ret = 1;
+
+ close(fd);
+ return ret;
+}
+
#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 5ff63e5d..ba7892b7 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -37,20 +37,18 @@
/*
* determine if VFIO is present on the system
*/
-#ifdef RTE_EAL_VFIO
+#if !defined(VFIO_PRESENT) && defined(RTE_EAL_VFIO)
#include <linux/version.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
-#include <linux/vfio.h>
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
-#define RTE_PCI_MSIX_TABLE_BIR 0x7
-#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8
-#define RTE_PCI_MSIX_FLAGS_QSIZE 0x07ff
+#define VFIO_PRESENT
#else
-#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR
-#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET
-#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE
-#endif
+#pragma message("VFIO configured but not supported by this kernel, disabling.")
+#endif /* kernel version >= 3.6.0 */
+#endif /* RTE_EAL_VFIO */
+
+#ifdef VFIO_PRESENT
+
+#include <linux/vfio.h>
#define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU
@@ -144,13 +142,6 @@ struct vfio_config {
struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
};
-#define VFIO_DIR "/dev/vfio"
-#define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
-#define VFIO_GROUP_FMT "/dev/vfio/%u"
-#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
-#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
-#define VFIO_GET_REGION_IDX(x) (x >> 40)
-
/* DMA mapping function prototype.
* Takes VFIO container fd as a parameter.
* Returns 0 on success, -1 on error.
@@ -190,24 +181,6 @@ vfio_get_group_fd(int iommu_group_no);
int
clear_group(int vfio_group_fd);
-/**
- * Setup vfio_cfg for the device identified by its address. It discovers
- * the configured I/O MMU groups or sets a new one for the device. If a new
- * groups is assigned, the DMA mapping is performed.
- * Returns 0 on success, a negative value on failure and a positive value in
- * case the given device cannot be managed this way.
- */
-int vfio_setup_device(const char *sysfs_base, const char *dev_addr,
- int *vfio_dev_fd, struct vfio_device_info *device_info);
-
-int vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd);
-
-int vfio_enable(const char *modname);
-int vfio_is_enabled(const char *modname);
-
-int pci_vfio_enable(void);
-int pci_vfio_is_enabled(void);
-
int vfio_mp_sync_setup(void);
#define SOCKET_REQ_CONTAINER 0x100
@@ -217,8 +190,6 @@ int vfio_mp_sync_setup(void);
#define SOCKET_NO_FD 0x1
#define SOCKET_ERR 0xFF
-#define VFIO_PRESENT
-#endif /* kernel version */
-#endif /* RTE_EAL_VFIO */
+#endif /* VFIO_PRESENT */
#endif /* EAL_VFIO_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
index 7e8095cb..b53ed7eb 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
@@ -49,12 +49,12 @@
#endif
#include <rte_log.h>
-#include <rte_pci.h>
#include <rte_eal_memconfig.h>
#include <rte_malloc.h>
+#include <rte_vfio.h>
#include "eal_filesystem.h"
-#include "eal_pci_init.h"
+#include "eal_vfio.h"
#include "eal_thread.h"
/**
@@ -301,7 +301,8 @@ vfio_mp_sync_thread(void __rte_unused * arg)
vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
else
vfio_mp_sync_send_fd(conn_sock, fd);
- close(fd);
+ if (fd >= 0)
+ close(fd);
break;
case SOCKET_REQ_GROUP:
/* wait for group number */
diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
deleted file mode 100644
index 19db1cb5..00000000
--- a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
+++ /dev/null
@@ -1,381 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <errno.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <inttypes.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/queue.h>
-#include <sys/file.h>
-#include <unistd.h>
-#include <limits.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-
-#include <rte_log.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_launch.h>
-#include <rte_eal.h>
-#include <rte_eal_memconfig.h>
-#include <rte_per_lcore.h>
-#include <rte_lcore.h>
-#include <rte_common.h>
-#include <rte_string_fns.h>
-
-#include "eal_private.h"
-#include "eal_internal_cfg.h"
-#include "eal_filesystem.h"
-#include <exec-env/rte_dom0_common.h>
-
-#define PAGE_SIZE RTE_PGSIZE_4K
-#define DEFAUL_DOM0_NAME "dom0-mem"
-
-static int xen_fd = -1;
-static const char sys_dir_path[] = "/sys/kernel/mm/dom0-mm/memsize-mB";
-
-/*
- * Try to mmap *size bytes in /dev/zero. If it is successful, return the
- * pointer to the mmap'd area and keep *size unmodified. Else, retry
- * with a smaller zone: decrease *size by mem_size until it reaches
- * 0. In this case, return NULL. Note: this function returns an address
- * which is a multiple of mem_size size.
- */
-static void *
-xen_get_virtual_area(size_t *size, size_t mem_size)
-{
- void *addr;
- int fd;
- long aligned_addr;
-
- RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zu bytes\n", *size);
-
- fd = open("/dev/zero", O_RDONLY);
- if (fd < 0){
- RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n");
- return NULL;
- }
- do {
- addr = mmap(NULL, (*size) + mem_size, PROT_READ,
- MAP_PRIVATE, fd, 0);
- if (addr == MAP_FAILED)
- *size -= mem_size;
- } while (addr == MAP_FAILED && *size > 0);
-
- if (addr == MAP_FAILED) {
- close(fd);
- RTE_LOG(ERR, EAL, "Cannot get a virtual area\n");
- return NULL;
- }
-
- munmap(addr, (*size) + mem_size);
- close(fd);
-
- /* align addr to a mem_size boundary */
- aligned_addr = (uintptr_t)addr;
- aligned_addr = RTE_ALIGN_CEIL(aligned_addr, mem_size);
- addr = (void *)(aligned_addr);
-
- RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
- addr, *size);
-
- return addr;
-}
-
-/**
- * Get memory size configuration from /sys/devices/virtual/misc/dom0_mm
- * /memsize-mB/memsize file, and the size unit is mB.
- */
-static int
-get_xen_memory_size(void)
-{
- char path[PATH_MAX];
- unsigned long mem_size = 0;
- static const char *file_name;
-
- file_name = "memsize";
- snprintf(path, sizeof(path), "%s/%s",
- sys_dir_path, file_name);
-
- if (eal_parse_sysfs_value(path, &mem_size) < 0)
- return -1;
-
- if (mem_size == 0)
- rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s was not"
- " configured.\n",sys_dir_path, file_name);
- if (mem_size % 2)
- rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s must be"
- " even number.\n",sys_dir_path, file_name);
-
- if (mem_size > DOM0_CONFIG_MEMSIZE)
- rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s should not be larger"
- " than %d mB\n",sys_dir_path, file_name, DOM0_CONFIG_MEMSIZE);
-
- return mem_size;
-}
-
-/**
- * Based on physical address to caculate MFN in Xen Dom0.
- */
-phys_addr_t
-rte_xen_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr)
-{
- int mfn_id, i;
- uint64_t mfn, mfn_offset;
- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- struct rte_memseg *memseg = mcfg->memseg;
-
- /* find the memory segment owning the physical address */
- if (memseg_id == -1) {
- for (i = 0; i < RTE_MAX_MEMSEG; i++) {
- if ((phy_addr >= memseg[i].phys_addr) &&
- (phy_addr < memseg[i].phys_addr +
- memseg[i].len)) {
- memseg_id = i;
- break;
- }
- }
- if (memseg_id == -1)
- return RTE_BAD_PHYS_ADDR;
- }
-
- mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M;
-
- /*the MFN is contiguous in 2M */
- mfn_offset = (phy_addr - memseg[memseg_id].phys_addr) %
- RTE_PGSIZE_2M / PAGE_SIZE;
- mfn = mfn_offset + memseg[memseg_id].mfn[mfn_id];
-
- /** return mechine address */
- return mfn * PAGE_SIZE + phy_addr % PAGE_SIZE;
-}
-
-int
-rte_xen_dom0_memory_init(void)
-{
- void *vir_addr, *vma_addr = NULL;
- int err, ret = 0;
- uint32_t i, requested, mem_size, memseg_idx, num_memseg = 0;
- size_t vma_len = 0;
- struct memory_info meminfo;
- struct memseg_info seginfo[RTE_MAX_MEMSEG];
- int flags, page_size = getpagesize();
- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- struct rte_memseg *memseg = mcfg->memseg;
- uint64_t total_mem = internal_config.memory;
-
- memset(seginfo, 0, sizeof(seginfo));
- memset(&meminfo, 0, sizeof(struct memory_info));
-
- mem_size = get_xen_memory_size();
- requested = (unsigned) (total_mem / 0x100000);
- if (requested > mem_size)
- /* if we didn't satisfy total memory requirements */
- rte_exit(EXIT_FAILURE,"Not enough memory available! Requested: %uMB,"
- " available: %uMB\n", requested, mem_size);
- else if (total_mem != 0)
- mem_size = requested;
-
- /* Check FD and open once */
- if (xen_fd < 0) {
- xen_fd = open(DOM0_MM_DEV, O_RDWR);
- if (xen_fd < 0) {
- RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
- return -1;
- }
- }
-
- meminfo.size = mem_size;
-
- /* construct memory mangement name for Dom0 */
- snprintf(meminfo.name, DOM0_NAME_MAX, "%s-%s",
- internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
-
- /* Notify kernel driver to allocate memory */
- ret = ioctl(xen_fd, RTE_DOM0_IOCTL_PREPARE_MEMSEG, &meminfo);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memory\n");
- err = -EIO;
- goto fail;
- }
-
- /* Get number of memory segment from driver */
- ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_NUM_MEMSEG, &num_memseg);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg count.\n");
- err = -EIO;
- goto fail;
- }
-
- if(num_memseg > RTE_MAX_MEMSEG){
- RTE_LOG(ERR, EAL, "XEN DOM0: the memseg count %d is greater"
- " than max memseg %d.\n",num_memseg, RTE_MAX_MEMSEG);
- err = -EIO;
- goto fail;
- }
-
- /* get all memory segements information */
- ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_MEMSEG_INFO, seginfo);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg info.\n");
- err = -EIO;
- goto fail;
- }
-
- /* map all memory segments to contiguous user space */
- for (memseg_idx = 0; memseg_idx < num_memseg; memseg_idx++)
- {
- vma_len = seginfo[memseg_idx].size;
-
- /**
- * get the biggest virtual memory area up to vma_len. If it fails,
- * vma_addr is NULL, so let the kernel provide the address.
- */
- vma_addr = xen_get_virtual_area(&vma_len, RTE_PGSIZE_2M);
- if (vma_addr == NULL) {
- flags = MAP_SHARED;
- vma_len = RTE_PGSIZE_2M;
- } else
- flags = MAP_SHARED | MAP_FIXED;
-
- seginfo[memseg_idx].size = vma_len;
- vir_addr = mmap(vma_addr, seginfo[memseg_idx].size,
- PROT_READ|PROT_WRITE, flags, xen_fd,
- memseg_idx * page_size);
- if (vir_addr == MAP_FAILED) {
- RTE_LOG(ERR, EAL, "XEN DOM0:Could not mmap %s\n",
- DOM0_MM_DEV);
- err = -EIO;
- goto fail;
- }
-
- memseg[memseg_idx].addr = vir_addr;
- memseg[memseg_idx].phys_addr = page_size *
- seginfo[memseg_idx].pfn ;
- memseg[memseg_idx].len = seginfo[memseg_idx].size;
- for ( i = 0; i < seginfo[memseg_idx].size / RTE_PGSIZE_2M; i++)
- memseg[memseg_idx].mfn[i] = seginfo[memseg_idx].mfn[i];
-
- /* MFNs are continuous in 2M, so assume that page size is 2M */
- memseg[memseg_idx].hugepage_sz = RTE_PGSIZE_2M;
-
- memseg[memseg_idx].nchannel = mcfg->nchannel;
- memseg[memseg_idx].nrank = mcfg->nrank;
-
- /* NUMA is not suppoted in Xen Dom0, so only set socket 0*/
- memseg[memseg_idx].socket_id = 0;
- }
-
- return 0;
-fail:
- if (xen_fd > 0) {
- close(xen_fd);
- xen_fd = -1;
- }
- return err;
-}
-
-/*
- * This creates the memory mappings in the secondary process to match that of
- * the server process. It goes through each memory segment in the DPDK runtime
- * configuration, mapping them in order to form a contiguous block in the
- * virtual memory space
- */
-int
-rte_xen_dom0_memory_attach(void)
-{
- const struct rte_mem_config *mcfg;
- unsigned s = 0; /* s used to track the segment number */
- int xen_fd = -1;
- int ret = -1;
- void *vir_addr;
- char name[DOM0_NAME_MAX] = {0};
- int page_size = getpagesize();
-
- mcfg = rte_eal_get_configuration()->mem_config;
-
- /* Check FD and open once */
- if (xen_fd < 0) {
- xen_fd = open(DOM0_MM_DEV, O_RDWR);
- if (xen_fd < 0) {
- RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
- goto error;
- }
- }
-
- /* construct memory mangement name for Dom0 */
- snprintf(name, DOM0_NAME_MAX, "%s-%s",
- internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
- /* attach to memory segments of primary process */
- ret = ioctl(xen_fd, RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG, name);
- if (ret) {
- RTE_LOG(ERR, EAL,"attach memory segments fail.\n");
- goto error;
- }
-
- /* map all segments into memory to make sure we get the addrs */
- for (s = 0; s < RTE_MAX_MEMSEG; ++s) {
-
- /*
- * the first memory segment with len==0 is the one that
- * follows the last valid segment.
- */
- if (mcfg->memseg[s].len == 0)
- break;
-
- vir_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
- PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, xen_fd,
- s * page_size);
- if (vir_addr == MAP_FAILED) {
- RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
- "in %s to requested address [%p]\n",
- (unsigned long long)mcfg->memseg[s].len, DOM0_MM_DEV,
- mcfg->memseg[s].addr);
- goto error;
- }
- }
- return 0;
-
-error:
- if (xen_fd >= 0) {
- close(xen_fd);
- xen_fd = -1;
- }
- return -1;
-}
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h
deleted file mode 100644
index d9707780..00000000
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*-
- * This file is provided under a dual BSD/LGPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GNU LESSER GENERAL PUBLIC LICENSE
- *
- * Copyright(c) 2007-2014 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Contact Information:
- * Intel Corporation
- *
- *
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef _RTE_DOM0_COMMON_H_
-#define _RTE_DOM0_COMMON_H_
-
-#ifdef __KERNEL__
-#include <linux/if.h>
-#endif
-
-#define DOM0_NAME_MAX 256
-#define DOM0_MM_DEV "/dev/dom0_mm"
-
-#define DOM0_CONTIG_NUM_ORDER 9 /**< order of 2M */
-#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */
-#define DOM0_MEMBLOCK_SIZE 0x200000 /**< size of memory block(2M). */
-#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */
-#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */
-
-#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info)
-#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *)
-#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int)
-#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *)
-
-/**
- * A structure used to store memory information.
- */
-struct memory_info {
- char name[DOM0_NAME_MAX];
- uint64_t size;
-};
-
-/**
- * A structure used to store memory segment information.
- */
-struct memseg_info {
- uint32_t idx;
- uint64_t pfn;
- uint64_t size;
- uint64_t mfn[DOM0_NUM_MEMBLOCK];
-};
-
-/**
- * A structure used to store memory block information.
- */
-struct memblock_info {
- uint8_t exchange_flag;
- uint8_t used;
- uint64_t vir_addr;
- uint64_t pfn;
- uint64_t mfn;
-};
-#endif /* _RTE_DOM0_COMMON_H_ */
diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h
index b800a53c..ce456d4b 100644
--- a/lib/librte_eal/linuxapp/igb_uio/compat.h
+++ b/lib/librte_eal/linuxapp/igb_uio/compat.h
@@ -16,12 +16,9 @@
#endif
#ifndef PCI_MSIX_ENTRY_SIZE
-#define PCI_MSIX_ENTRY_SIZE 16
-#define PCI_MSIX_ENTRY_LOWER_ADDR 0
-#define PCI_MSIX_ENTRY_UPPER_ADDR 4
-#define PCI_MSIX_ENTRY_DATA 8
-#define PCI_MSIX_ENTRY_VECTOR_CTRL 12
-#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1
+#define PCI_MSIX_ENTRY_SIZE 16
+#define PCI_MSIX_ENTRY_VECTOR_CTRL 12
+#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1
#endif
/*
@@ -124,6 +121,14 @@ static bool pci_check_and_mask_intx(struct pci_dev *pdev)
#endif /* < 3.3.0 */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)
-#define HAVE_PCI_ENABLE_MSIX
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
+#define HAVE_ALLOC_IRQ_VECTORS 1
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
+#define HAVE_MSI_LIST_IN_GENERIC_DEVICE 1
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
+#define HAVE_PCI_MSI_MASK_IRQ 1
#endif
diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
index 07a19a31..a3a98c17 100644
--- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
+++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
@@ -29,13 +29,11 @@
#include <linux/pci.h>
#include <linux/uio_driver.h>
#include <linux/io.h>
+#include <linux/irq.h>
#include <linux/msi.h>
#include <linux/version.h>
#include <linux/slab.h>
-#ifdef CONFIG_XEN_DOM0
-#include <xen/xen.h>
-#endif
#include <rte_pci_dev_features.h>
#include "compat.h"
@@ -51,7 +49,6 @@ struct rte_uio_pci_dev {
static char *intr_mode;
static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
-
/* sriov sysfs */
static ssize_t
show_max_vfs(struct device *dev, struct device_attribute *attr,
@@ -91,14 +88,16 @@ static struct attribute *dev_attrs[] = {
static const struct attribute_group dev_attr_grp = {
.attrs = dev_attrs,
};
+
+#ifndef HAVE_PCI_MSI_MASK_IRQ
/*
* It masks the msix on/off of generating MSI-X messages.
*/
static void
-igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state)
+igbuio_msix_mask_irq(struct msi_desc *desc, s32 state)
{
u32 mask_bits = desc->masked;
- unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+ unsigned int offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
PCI_MSIX_ENTRY_VECTOR_CTRL;
if (state != 0)
@@ -113,6 +112,52 @@ igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state)
}
}
+/*
+ * It masks the msi on/off of generating MSI messages.
+ */
+static void
+igbuio_msi_mask_irq(struct pci_dev *pdev, struct msi_desc *desc, int32_t state)
+{
+ u32 mask_bits = desc->masked;
+ u32 offset = desc->irq - pdev->irq;
+ u32 mask = 1 << offset;
+
+ if (!desc->msi_attrib.maskbit)
+ return;
+
+ if (state != 0)
+ mask_bits &= ~mask;
+ else
+ mask_bits |= mask;
+
+ if (mask_bits != desc->masked) {
+ pci_write_config_dword(pdev, desc->mask_pos, mask_bits);
+ desc->masked = mask_bits;
+ }
+}
+
+static void
+igbuio_mask_irq(struct pci_dev *pdev, enum rte_intr_mode mode, s32 irq_state)
+{
+ struct msi_desc *desc;
+ struct list_head *msi_list;
+
+#ifdef HAVE_MSI_LIST_IN_GENERIC_DEVICE
+ msi_list = &pdev->dev.msi_list;
+#else
+ msi_list = &pdev->msi_list;
+#endif
+
+ if (mode == RTE_INTR_MODE_MSIX) {
+ list_for_each_entry(desc, msi_list, list)
+ igbuio_msix_mask_irq(desc, irq_state);
+ } else if (mode == RTE_INTR_MODE_MSI) {
+ list_for_each_entry(desc, msi_list, list)
+ igbuio_msi_mask_irq(pdev, desc, irq_state);
+ }
+}
+#endif
+
/**
* This is the irqcontrol callback to be registered to uio_info.
* It can be used to disable/enable interrupt from user space processes.
@@ -132,21 +177,26 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
struct rte_uio_pci_dev *udev = info->priv;
struct pci_dev *pdev = udev->pdev;
- pci_cfg_access_lock(pdev);
- if (udev->mode == RTE_INTR_MODE_LEGACY)
- pci_intx(pdev, !!irq_state);
+#ifdef HAVE_PCI_MSI_MASK_IRQ
+ struct irq_data *irq = irq_get_irq_data(udev->info.irq);
+#endif
- else if (udev->mode == RTE_INTR_MODE_MSIX) {
- struct msi_desc *desc;
+ pci_cfg_access_lock(pdev);
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0))
- list_for_each_entry(desc, &pdev->msi_list, list)
- igbuio_msix_mask_irq(desc, irq_state);
+ if (udev->mode == RTE_INTR_MODE_MSIX || udev->mode == RTE_INTR_MODE_MSI) {
+#ifdef HAVE_PCI_MSI_MASK_IRQ
+ if (irq_state == 1)
+ pci_msi_unmask_irq(irq);
+ else
+ pci_msi_mask_irq(irq);
#else
- list_for_each_entry(desc, &pdev->dev.msi_list, list)
- igbuio_msix_mask_irq(desc, irq_state);
+ igbuio_mask_irq(pdev, udev->mode, irq_state);
#endif
}
+
+ if (udev->mode == RTE_INTR_MODE_LEGACY)
+ pci_intx(pdev, !!irq_state);
+
pci_cfg_access_unlock(pdev);
return 0;
@@ -157,19 +207,125 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
* If yes, disable it here and will be enable later.
*/
static irqreturn_t
-igbuio_pci_irqhandler(int irq, struct uio_info *info)
+igbuio_pci_irqhandler(int irq, void *dev_id)
{
- struct rte_uio_pci_dev *udev = info->priv;
+ struct rte_uio_pci_dev *udev = (struct rte_uio_pci_dev *)dev_id;
+ struct uio_info *info = &udev->info;
/* Legacy mode need to mask in hardware */
if (udev->mode == RTE_INTR_MODE_LEGACY &&
!pci_check_and_mask_intx(udev->pdev))
return IRQ_NONE;
+ uio_event_notify(info);
+
/* Message signal mode, no share IRQ and automasked */
return IRQ_HANDLED;
}
+static int
+igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev)
+{
+ int err = 0;
+#ifndef HAVE_ALLOC_IRQ_VECTORS
+ struct msix_entry msix_entry;
+#endif
+
+ switch (igbuio_intr_mode_preferred) {
+ case RTE_INTR_MODE_MSIX:
+ /* Only 1 msi-x vector needed */
+#ifndef HAVE_ALLOC_IRQ_VECTORS
+ msix_entry.entry = 0;
+ if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) {
+ dev_dbg(&udev->pdev->dev, "using MSI-X");
+ udev->info.irq_flags = IRQF_NO_THREAD;
+ udev->info.irq = msix_entry.vector;
+ udev->mode = RTE_INTR_MODE_MSIX;
+ break;
+ }
+#else
+ if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1) {
+ dev_dbg(&udev->pdev->dev, "using MSI-X");
+ udev->info.irq_flags = IRQF_NO_THREAD;
+ udev->info.irq = pci_irq_vector(udev->pdev, 0);
+ udev->mode = RTE_INTR_MODE_MSIX;
+ break;
+ }
+#endif
+
+ /* fall back to MSI */
+ case RTE_INTR_MODE_MSI:
+#ifndef HAVE_ALLOC_IRQ_VECTORS
+ if (pci_enable_msi(udev->pdev) == 0) {
+ dev_dbg(&udev->pdev->dev, "using MSI");
+ udev->info.irq_flags = IRQF_NO_THREAD;
+ udev->info.irq = udev->pdev->irq;
+ udev->mode = RTE_INTR_MODE_MSI;
+ break;
+ }
+#else
+ if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1) {
+ dev_dbg(&udev->pdev->dev, "using MSI");
+ udev->info.irq_flags = IRQF_NO_THREAD;
+ udev->info.irq = pci_irq_vector(udev->pdev, 0);
+ udev->mode = RTE_INTR_MODE_MSI;
+ break;
+ }
+#endif
+ /* fall back to INTX */
+ case RTE_INTR_MODE_LEGACY:
+ if (pci_intx_mask_supported(udev->pdev)) {
+ dev_dbg(&udev->pdev->dev, "using INTX");
+ udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD;
+ udev->info.irq = udev->pdev->irq;
+ udev->mode = RTE_INTR_MODE_LEGACY;
+ break;
+ }
+ dev_notice(&udev->pdev->dev, "PCI INTX mask not supported\n");
+ /* fall back to no IRQ */
+ case RTE_INTR_MODE_NONE:
+ udev->mode = RTE_INTR_MODE_NONE;
+ udev->info.irq = UIO_IRQ_NONE;
+ break;
+
+ default:
+ dev_err(&udev->pdev->dev, "invalid IRQ mode %u",
+ igbuio_intr_mode_preferred);
+ udev->info.irq = UIO_IRQ_NONE;
+ err = -EINVAL;
+ }
+
+ if (udev->info.irq != UIO_IRQ_NONE)
+ err = request_irq(udev->info.irq, igbuio_pci_irqhandler,
+ udev->info.irq_flags, udev->info.name,
+ udev);
+ dev_info(&udev->pdev->dev, "uio device registered with irq %lx\n",
+ udev->info.irq);
+
+ return err;
+}
+
+static void
+igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev)
+{
+ if (udev->info.irq) {
+ free_irq(udev->info.irq, udev);
+ udev->info.irq = 0;
+ }
+
+#ifndef HAVE_ALLOC_IRQ_VECTORS
+ if (udev->mode == RTE_INTR_MODE_MSIX)
+ pci_disable_msix(udev->pdev);
+ if (udev->mode == RTE_INTR_MODE_MSI)
+ pci_disable_msi(udev->pdev);
+#else
+ if (udev->mode == RTE_INTR_MODE_MSIX ||
+ udev->mode == RTE_INTR_MODE_MSI)
+ pci_free_irq_vectors(udev->pdev);
+#endif
+}
+
+
/**
* This gets called while opening uio device file.
*/
@@ -178,12 +334,17 @@ igbuio_pci_open(struct uio_info *info, struct inode *inode)
{
struct rte_uio_pci_dev *udev = info->priv;
struct pci_dev *dev = udev->pdev;
-
- pci_reset_function(dev);
+ int err;
/* set bus master, which was cleared by the reset function */
pci_set_master(dev);
+ /* enable interrupts */
+ err = igbuio_pci_enable_interrupts(udev);
+ if (err) {
+ dev_err(&dev->dev, "Enable interrupt fails\n");
+ return err;
+ }
return 0;
}
@@ -193,60 +354,15 @@ igbuio_pci_release(struct uio_info *info, struct inode *inode)
struct rte_uio_pci_dev *udev = info->priv;
struct pci_dev *dev = udev->pdev;
+ /* disable interrupts */
+ igbuio_pci_disable_interrupts(udev);
+
/* stop the device from further DMA */
pci_clear_master(dev);
- pci_reset_function(dev);
-
return 0;
}
-#ifdef CONFIG_XEN_DOM0
-static int
-igbuio_dom0_mmap_phys(struct uio_info *info, struct vm_area_struct *vma)
-{
- int idx;
-
- idx = (int)vma->vm_pgoff;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-#ifdef HAVE_PTE_MASK_PAGE_IOMAP
- vma->vm_page_prot.pgprot |= _PAGE_IOMAP;
-#endif
-
- return remap_pfn_range(vma,
- vma->vm_start,
- info->mem[idx].addr >> PAGE_SHIFT,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot);
-}
-
-/**
- * This is uio device mmap method which will use igbuio mmap for Xen
- * Dom0 environment.
- */
-static int
-igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma)
-{
- int idx;
-
- if (vma->vm_pgoff >= MAX_UIO_MAPS)
- return -EINVAL;
-
- if (info->mem[vma->vm_pgoff].size == 0)
- return -EINVAL;
-
- idx = (int)vma->vm_pgoff;
- switch (info->mem[idx].memtype) {
- case UIO_MEM_PHYS:
- return igbuio_dom0_mmap_phys(info, vma);
- case UIO_MEM_LOGICAL:
- case UIO_MEM_VIRTUAL:
- default:
- return -EINVAL;
- }
-}
-#endif
-
/* Remap pci resources described by bar #pci_bar in uio resource n. */
static int
igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
@@ -356,9 +472,6 @@ static int
igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
struct rte_uio_pci_dev *udev;
-#ifdef HAVE_PCI_ENABLE_MSIX
- struct msix_entry msix_entry;
-#endif
dma_addr_t map_dma_addr;
void *map_addr;
int err;
@@ -401,61 +514,12 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
/* fill uio infos */
udev->info.name = "igb_uio";
udev->info.version = "0.1";
- udev->info.handler = igbuio_pci_irqhandler;
udev->info.irqcontrol = igbuio_pci_irqcontrol;
udev->info.open = igbuio_pci_open;
udev->info.release = igbuio_pci_release;
-#ifdef CONFIG_XEN_DOM0
- /* check if the driver run on Xen Dom0 */
- if (xen_initial_domain())
- udev->info.mmap = igbuio_dom0_pci_mmap;
-#endif
udev->info.priv = udev;
udev->pdev = dev;
- switch (igbuio_intr_mode_preferred) {
- case RTE_INTR_MODE_MSIX:
- /* Only 1 msi-x vector needed */
-#ifdef HAVE_PCI_ENABLE_MSIX
- msix_entry.entry = 0;
- if (pci_enable_msix(dev, &msix_entry, 1) == 0) {
- dev_dbg(&dev->dev, "using MSI-X");
- udev->info.irq_flags = IRQF_NO_THREAD;
- udev->info.irq = msix_entry.vector;
- udev->mode = RTE_INTR_MODE_MSIX;
- break;
- }
-#else
- if (pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_MSIX) == 1) {
- dev_dbg(&dev->dev, "using MSI-X");
- udev->info.irq = pci_irq_vector(dev, 0);
- udev->mode = RTE_INTR_MODE_MSIX;
- break;
- }
-#endif
- /* fall back to INTX */
- case RTE_INTR_MODE_LEGACY:
- if (pci_intx_mask_supported(dev)) {
- dev_dbg(&dev->dev, "using INTX");
- udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD;
- udev->info.irq = dev->irq;
- udev->mode = RTE_INTR_MODE_LEGACY;
- break;
- }
- dev_notice(&dev->dev, "PCI INTX mask not supported\n");
- /* fall back to no IRQ */
- case RTE_INTR_MODE_NONE:
- udev->mode = RTE_INTR_MODE_NONE;
- udev->info.irq = 0;
- break;
-
- default:
- dev_err(&dev->dev, "invalid IRQ mode %u",
- igbuio_intr_mode_preferred);
- err = -EINVAL;
- goto fail_release_iomem;
- }
-
err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp);
if (err != 0)
goto fail_release_iomem;
@@ -467,9 +531,6 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
pci_set_drvdata(dev, udev);
- dev_info(&dev->dev, "uio device registered with irq %lx\n",
- udev->info.irq);
-
/*
* Doing a harmless dma mapping for attaching the device to
* the iommu identity mapping if kernel boots with iommu=pt.
@@ -497,8 +558,6 @@ fail_remove_group:
sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
fail_release_iomem:
igbuio_pci_release_iomem(&udev->info);
- if (udev->mode == RTE_INTR_MODE_MSIX)
- pci_disable_msix(udev->pdev);
pci_disable_device(dev);
fail_free:
kfree(udev);
@@ -514,8 +573,6 @@ igbuio_pci_remove(struct pci_dev *dev)
sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
uio_unregister_device(&udev->info);
igbuio_pci_release_iomem(&udev->info);
- if (udev->mode == RTE_INTR_MODE_MSIX)
- pci_disable_msix(dev);
pci_disable_device(dev);
pci_set_drvdata(dev, NULL);
kfree(udev);
@@ -532,6 +589,9 @@ igbuio_config_intr_mode(char *intr_str)
if (!strcmp(intr_str, RTE_INTR_MODE_MSIX_NAME)) {
igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
pr_info("Use MSIX interrupt\n");
+ } else if (!strcmp(intr_str, RTE_INTR_MODE_MSI_NAME)) {
+ igbuio_intr_mode_preferred = RTE_INTR_MODE_MSI;
+ pr_info("Use MSI interrupt\n");
} else if (!strcmp(intr_str, RTE_INTR_MODE_LEGACY_NAME)) {
igbuio_intr_mode_preferred = RTE_INTR_MODE_LEGACY;
pr_info("Use legacy interrupt\n");
@@ -575,6 +635,7 @@ module_param(intr_mode, charp, S_IRUGO);
MODULE_PARM_DESC(intr_mode,
"igb_uio interrupt mode (default=msix):\n"
" " RTE_INTR_MODE_MSIX_NAME " Use MSIX interrupt\n"
+" " RTE_INTR_MODE_MSI_NAME " Use MSI interrupt\n"
" " RTE_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n"
"\n");
diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h
index 6a1587b4..3f8c0bc8 100644
--- a/lib/librte_eal/linuxapp/kni/compat.h
+++ b/lib/librte_eal/linuxapp/kni/compat.h
@@ -8,6 +8,34 @@
#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
#endif
+/* SuSE version macro is the same as Linux kernel version */
+#ifndef SLE_VERSION
+#define SLE_VERSION(a, b, c) KERNEL_VERSION(a, b, c)
+#endif
+#ifdef CONFIG_SUSE_KERNEL
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57))
+/* SLES12SP3 is at least 4.4.57+ based */
+#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0)
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 28))
+/* SLES12 is at least 3.12.28+ based */
+#define SLE_VERSION_CODE SLE_VERSION(12, 0, 0)
+#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 61)) && \
+ (LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)))
+/* SLES11 SP3 is at least 3.0.61+ based */
+#define SLE_VERSION_CODE SLE_VERSION(11, 3, 0)
+#elif (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32))
+/* SLES11 SP1 is 2.6.32 based */
+#define SLE_VERSION_CODE SLE_VERSION(11, 1, 0)
+#elif (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 27))
+/* SLES11 GA is 2.6.27 based */
+#define SLE_VERSION_CODE SLE_VERSION(11, 0, 0)
+#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */
+#endif /* CONFIG_SUSE_KERNEL */
+#ifndef SLE_VERSION_CODE
+#define SLE_VERSION_CODE 0
+#endif /* SLE_VERSION_CODE */
+
+
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
(!(defined(RHEL_RELEASE_CODE) && \
RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4)))
@@ -55,7 +83,8 @@
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) || \
(defined(RHEL_RELEASE_CODE) && \
- RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 4))
+ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 4)) || \
+ (SLE_VERSION_CODE && SLE_VERSION_CODE == SLE_VERSION(12, 3, 0))
#define HAVE_TRANS_START_HELPER
#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
index e0a03542..e38a7561 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
@@ -697,22 +697,22 @@ struct _kc_ethtool_pauseparam {
#define SLE_VERSION(a,b,c) KERNEL_VERSION(a,b,c)
#endif
#ifdef CONFIG_SUSE_KERNEL
-#if ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) )
-/* SLES11 GA is 2.6.27 based */
-#define SLE_VERSION_CODE SLE_VERSION(11,0,0)
-#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) )
-/* SLES11 SP1 is 2.6.32 based */
-#define SLE_VERSION_CODE SLE_VERSION(11,1,0)
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57))
+/* SLES12SP3 is at least 4.4.57+ based */
+#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0)
+#elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) )
+/* SLES12 is at least 3.12.28+ based */
+#define SLE_VERSION_CODE SLE_VERSION(12,0,0)
#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,61)) && \
(LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0)))
/* SLES11 SP3 is at least 3.0.61+ based */
#define SLE_VERSION_CODE SLE_VERSION(11,3,0)
-#elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) )
-/* SLES12 is at least 3.12.28+ based */
-#define SLE_VERSION_CODE SLE_VERSION(12,0,0)
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57))
-/* SLES12SP3 is at least 4.4.57+ based */
-#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0)
+#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) )
+/* SLES11 SP1 is 2.6.32 based */
+#define SLE_VERSION_CODE SLE_VERSION(11,1,0)
+#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) )
+/* SLES11 GA is 2.6.27 based */
+#define SLE_VERSION_CODE SLE_VERSION(11,0,0)
#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */
#endif /* CONFIG_SUSE_KERNEL */
#ifndef SLE_VERSION_CODE
diff --git a/lib/librte_eal/linuxapp/xen_dom0/compat.h b/lib/librte_eal/linuxapp/xen_dom0/compat.h
deleted file mode 100644
index e6eb97f2..00000000
--- a/lib/librte_eal/linuxapp/xen_dom0/compat.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Minimal wrappers to allow compiling xen_dom0 on older kernels.
- */
-
-#ifndef RHEL_RELEASE_VERSION
-#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
- (!(defined(RHEL_RELEASE_CODE) && \
- RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4)))
-
-#define kstrtoul strict_strtoul
-
-#endif /* < 2.6.39 */
diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h
deleted file mode 100644
index 9d5ffb22..00000000
--- a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*-
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- * The full GNU General Public License is included in this distribution
- * in the file called LICENSE.GPL.
- *
- * Contact Information:
- * Intel Corporation
- *
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-#ifndef _DOM0_MM_DEV_H_
-#define _DOM0_MM_DEV_H_
-
-#include <linux/wait.h>
-#include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-#include <exec-env/rte_dom0_common.h>
-
-#define NUM_MEM_CTX 256 /**< Maximum number of memory context*/
-#define MAX_EXCHANGE_FAIL_TIME 5 /**< Maximum times of allowing exchange fail .*/
-#define MAX_MEMBLOCK_SIZE (2 * DOM0_MEMBLOCK_SIZE)
-#define MAX_NUM_ORDER (DOM0_CONTIG_NUM_ORDER + 1)
-#define SIZE_PER_BLOCK 2 /**< Size of memory block (2MB).*/
-
-/**
- * A structure describing the private information for a dom0 device.
- */
-struct dom0_mm_dev {
- struct miscdevice miscdev;
- uint8_t fail_times;
- uint32_t used_memsize;
- uint32_t num_mem_ctx;
- uint32_t config_memsize;
- uint32_t num_bigblock;
- struct dom0_mm_data *mm_data[NUM_MEM_CTX];
- struct mutex data_lock;
-};
-
-struct dom0_mm_data{
- uint32_t refcnt;
- uint32_t num_memseg; /**< Number of memory segment. */
- uint32_t mem_size; /**< Size of requesting memory. */
-
- char name[DOM0_NAME_MAX];
-
- /** Store global memory block IDs used by an instance */
- uint32_t block_num[DOM0_NUM_MEMBLOCK];
-
- /** Store memory block information.*/
- struct memblock_info block_info[DOM0_NUM_MEMBLOCK];
-
- /** Store memory segment information.*/
- struct memseg_info seg_info[DOM0_NUM_MEMSEG];
-};
-
-#define XEN_ERR(args...) printk(KERN_DEBUG "XEN_DOM0: Error: " args)
-#define XEN_PRINT(args...) printk(KERN_DEBUG "XEN_DOM0: " args)
-#endif
diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c
deleted file mode 100644
index 79630bad..00000000
--- a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c
+++ /dev/null
@@ -1,780 +0,0 @@
-/*-
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- * The full GNU General Public License is included in this distribution
- * in the file called LICENSE.GPL.
- *
- * Contact Information:
- * Intel Corporation
- *
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/miscdevice.h>
-#include <linux/fs.h>
-#include <linux/device.h>
-#include <linux/errno.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/version.h>
-
-#include <xen/xen.h>
-#include <xen/page.h>
-#include <xen/xen-ops.h>
-#include <xen/interface/memory.h>
-
-#include <exec-env/rte_dom0_common.h>
-
-#include "compat.h"
-#include "dom0_mm_dev.h"
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Kernel Module for supporting DPDK running on Xen Dom0");
-
-static struct dom0_mm_dev dom0_dev;
-static struct kobject *dom0_kobj = NULL;
-
-static struct memblock_info *rsv_mm_info;
-
-/* Default configuration for reserved memory size(2048 MB). */
-static uint32_t rsv_memsize = 2048;
-
-static int dom0_open(struct inode *inode, struct file *file);
-static int dom0_release(struct inode *inode, struct file *file);
-static int dom0_ioctl(struct file *file, unsigned int ioctl_num,
- unsigned long ioctl_param);
-static int dom0_mmap(struct file *file, struct vm_area_struct *vma);
-static int dom0_memory_free(uint32_t size);
-static int dom0_memory_release(struct dom0_mm_data *mm_data);
-
-static const struct file_operations data_fops = {
- .owner = THIS_MODULE,
- .open = dom0_open,
- .release = dom0_release,
- .mmap = dom0_mmap,
- .unlocked_ioctl = (void *)dom0_ioctl,
-};
-
-static ssize_t
-show_memsize_rsvd(struct device *dev, struct device_attribute *attr, char *buf)
-{
- return snprintf(buf, 10, "%u\n", dom0_dev.used_memsize);
-}
-
-static ssize_t
-show_memsize(struct device *dev, struct device_attribute *attr, char *buf)
-{
- return snprintf(buf, 10, "%u\n", dom0_dev.config_memsize);
-}
-
-static ssize_t
-store_memsize(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- int err = 0;
- unsigned long mem_size;
-
- if (0 != kstrtoul(buf, 0, &mem_size))
- return -EINVAL;
-
- mutex_lock(&dom0_dev.data_lock);
- if (0 == mem_size) {
- err = -EINVAL;
- goto fail;
- } else if (mem_size > (rsv_memsize - dom0_dev.used_memsize)) {
- XEN_ERR("configure memory size fail\n");
- err = -EINVAL;
- goto fail;
- } else
- dom0_dev.config_memsize = mem_size;
-
-fail:
- mutex_unlock(&dom0_dev.data_lock);
- return err ? err : count;
-}
-
-static DEVICE_ATTR(memsize, S_IRUGO | S_IWUSR, show_memsize, store_memsize);
-static DEVICE_ATTR(memsize_rsvd, S_IRUGO, show_memsize_rsvd, NULL);
-
-static struct attribute *dev_attrs[] = {
- &dev_attr_memsize.attr,
- &dev_attr_memsize_rsvd.attr,
- NULL,
-};
-
-/* the memory size unit is MB */
-static const struct attribute_group dev_attr_grp = {
- .name = "memsize-mB",
- .attrs = dev_attrs,
-};
-
-
-static void
-sort_viraddr(struct memblock_info *mb, int cnt)
-{
- int i,j;
- uint64_t tmp_pfn;
- uint64_t tmp_viraddr;
-
- /*sort virtual address and pfn */
- for(i = 0; i < cnt; i ++) {
- for(j = cnt - 1; j > i; j--) {
- if(mb[j].pfn < mb[j - 1].pfn) {
- tmp_pfn = mb[j - 1].pfn;
- mb[j - 1].pfn = mb[j].pfn;
- mb[j].pfn = tmp_pfn;
-
- tmp_viraddr = mb[j - 1].vir_addr;
- mb[j - 1].vir_addr = mb[j].vir_addr;
- mb[j].vir_addr = tmp_viraddr;
- }
- }
- }
-}
-
-static int
-dom0_find_memdata(const char * mem_name)
-{
- unsigned i;
- int idx = -1;
- for(i = 0; i< NUM_MEM_CTX; i++) {
- if(dom0_dev.mm_data[i] == NULL)
- continue;
- if (!strncmp(dom0_dev.mm_data[i]->name, mem_name,
- sizeof(char) * DOM0_NAME_MAX)) {
- idx = i;
- break;
- }
- }
-
- return idx;
-}
-
-static int
-dom0_find_mempos(void)
-{
- unsigned i;
- int idx = -1;
-
- for(i = 0; i< NUM_MEM_CTX; i++) {
- if(dom0_dev.mm_data[i] == NULL){
- idx = i;
- break;
- }
- }
-
- return idx;
-}
-
-static int
-dom0_memory_release(struct dom0_mm_data *mm_data)
-{
- int idx;
- uint32_t num_block, block_id;
-
- /* each memory block is 2M */
- num_block = mm_data->mem_size / SIZE_PER_BLOCK;
- if (num_block == 0)
- return -EINVAL;
-
- /* reset global memory data */
- idx = dom0_find_memdata(mm_data->name);
- if (idx >= 0) {
- dom0_dev.used_memsize -= mm_data->mem_size;
- dom0_dev.mm_data[idx] = NULL;
- dom0_dev.num_mem_ctx--;
- }
-
- /* reset these memory blocks status as free */
- for (idx = 0; idx < num_block; idx++) {
- block_id = mm_data->block_num[idx];
- rsv_mm_info[block_id].used = 0;
- }
-
- memset(mm_data, 0, sizeof(struct dom0_mm_data));
- vfree(mm_data);
- return 0;
-}
-
-static int
-dom0_memory_free(uint32_t rsv_size)
-{
- uint64_t vstart, vaddr;
- uint32_t i, num_block, size;
-
- if (!xen_pv_domain())
- return -1;
-
- /* each memory block is 2M */
- num_block = rsv_size / SIZE_PER_BLOCK;
- if (num_block == 0)
- return -EINVAL;
-
- /* free all memory blocks of size of 4M and destroy contiguous region */
- for (i = 0; i < dom0_dev.num_bigblock * 2; i += 2) {
- vstart = rsv_mm_info[i].vir_addr;
- if (vstart) {
- #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
- if (rsv_mm_info[i].exchange_flag)
- xen_destroy_contiguous_region(vstart,
- DOM0_CONTIG_NUM_ORDER);
- if (rsv_mm_info[i + 1].exchange_flag)
- xen_destroy_contiguous_region(vstart +
- DOM0_MEMBLOCK_SIZE,
- DOM0_CONTIG_NUM_ORDER);
- #else
- if (rsv_mm_info[i].exchange_flag)
- xen_destroy_contiguous_region(rsv_mm_info[i].pfn
- * PAGE_SIZE,
- DOM0_CONTIG_NUM_ORDER);
- if (rsv_mm_info[i + 1].exchange_flag)
- xen_destroy_contiguous_region(rsv_mm_info[i].pfn
- * PAGE_SIZE + DOM0_MEMBLOCK_SIZE,
- DOM0_CONTIG_NUM_ORDER);
- #endif
-
- size = DOM0_MEMBLOCK_SIZE * 2;
- vaddr = vstart;
- while (size > 0) {
- ClearPageReserved(virt_to_page(vaddr));
- vaddr += PAGE_SIZE;
- size -= PAGE_SIZE;
- }
- free_pages(vstart, MAX_NUM_ORDER);
- }
- }
-
- /* free all memory blocks size of 2M and destroy contiguous region */
- for (; i < num_block; i++) {
- vstart = rsv_mm_info[i].vir_addr;
- if (vstart) {
- if (rsv_mm_info[i].exchange_flag)
- xen_destroy_contiguous_region(vstart,
- DOM0_CONTIG_NUM_ORDER);
-
- size = DOM0_MEMBLOCK_SIZE;
- vaddr = vstart;
- while (size > 0) {
- ClearPageReserved(virt_to_page(vaddr));
- vaddr += PAGE_SIZE;
- size -= PAGE_SIZE;
- }
- free_pages(vstart, DOM0_CONTIG_NUM_ORDER);
- }
- }
-
- memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block);
- vfree(rsv_mm_info);
- rsv_mm_info = NULL;
-
- return 0;
-}
-
-static void
-find_free_memory(uint32_t count, struct dom0_mm_data *mm_data)
-{
- uint32_t i = 0;
- uint32_t j = 0;
-
- while ((i < count) && (j < rsv_memsize / SIZE_PER_BLOCK)) {
- if (rsv_mm_info[j].used == 0) {
- mm_data->block_info[i].pfn = rsv_mm_info[j].pfn;
- mm_data->block_info[i].vir_addr =
- rsv_mm_info[j].vir_addr;
- mm_data->block_info[i].mfn = rsv_mm_info[j].mfn;
- mm_data->block_info[i].exchange_flag =
- rsv_mm_info[j].exchange_flag;
- mm_data->block_num[i] = j;
- rsv_mm_info[j].used = 1;
- i++;
- }
- j++;
- }
-}
-
-/**
- * Find all memory segments in which physical addresses are contiguous.
- */
-static void
-find_memseg(int count, struct dom0_mm_data * mm_data)
-{
- int i = 0;
- int j, k, idx = 0;
- uint64_t zone_len, pfn, num_block;
-
- while(i < count) {
- if (mm_data->block_info[i].exchange_flag == 0) {
- i++;
- continue;
- }
- k = 0;
- pfn = mm_data->block_info[i].pfn;
- mm_data->seg_info[idx].pfn = pfn;
- mm_data->seg_info[idx].mfn[k] = mm_data->block_info[i].mfn;
-
- for (j = i + 1; j < count; j++) {
-
- /* ignore exchange fail memory block */
- if (mm_data->block_info[j].exchange_flag == 0)
- break;
-
- if (mm_data->block_info[j].pfn !=
- (mm_data->block_info[j - 1].pfn +
- DOM0_MEMBLOCK_SIZE / PAGE_SIZE))
- break;
- ++k;
- mm_data->seg_info[idx].mfn[k] = mm_data->block_info[j].mfn;
- }
-
- num_block = j - i;
- zone_len = num_block * DOM0_MEMBLOCK_SIZE;
- mm_data->seg_info[idx].size = zone_len;
-
- XEN_PRINT("memseg id=%d, size=0x%llx\n", idx, zone_len);
- i = i+ num_block;
- idx++;
- if (idx == DOM0_NUM_MEMSEG)
- break;
- }
- mm_data->num_memseg = idx;
-}
-
-static int
-dom0_memory_reserve(uint32_t rsv_size)
-{
- uint64_t pfn, vstart, vaddr;
- uint32_t i, num_block, size, allocated_size = 0;
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
- dma_addr_t dma_handle;
-#endif
-
- /* 2M as memory block */
- num_block = rsv_size / SIZE_PER_BLOCK;
-
- rsv_mm_info = vmalloc(sizeof(struct memblock_info) * num_block);
- if (!rsv_mm_info) {
- XEN_ERR("Unable to allocate device memory information\n");
- return -ENOMEM;
- }
- memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block);
-
- /* try alloc size of 4M once */
- for (i = 0; i < num_block; i += 2) {
- vstart = (unsigned long)
- __get_free_pages(GFP_ATOMIC, MAX_NUM_ORDER);
- if (vstart == 0)
- break;
-
- dom0_dev.num_bigblock = i / 2 + 1;
- allocated_size = SIZE_PER_BLOCK * (i + 2);
-
- /* size of 4M */
- size = DOM0_MEMBLOCK_SIZE * 2;
-
- vaddr = vstart;
- while (size > 0) {
- SetPageReserved(virt_to_page(vaddr));
- vaddr += PAGE_SIZE;
- size -= PAGE_SIZE;
- }
-
- pfn = virt_to_pfn(vstart);
- rsv_mm_info[i].pfn = pfn;
- rsv_mm_info[i].vir_addr = vstart;
- rsv_mm_info[i + 1].pfn =
- pfn + DOM0_MEMBLOCK_SIZE / PAGE_SIZE;
- rsv_mm_info[i + 1].vir_addr =
- vstart + DOM0_MEMBLOCK_SIZE;
- }
-
- /*if it failed to alloc 4M, and continue to alloc 2M once */
- for (; i < num_block; i++) {
- vstart = (unsigned long)
- __get_free_pages(GFP_ATOMIC, DOM0_CONTIG_NUM_ORDER);
- if (vstart == 0) {
- XEN_ERR("allocate memory fail.\n");
- dom0_memory_free(allocated_size);
- return -ENOMEM;
- }
-
- allocated_size += SIZE_PER_BLOCK;
-
- size = DOM0_MEMBLOCK_SIZE;
- vaddr = vstart;
- while (size > 0) {
- SetPageReserved(virt_to_page(vaddr));
- vaddr += PAGE_SIZE;
- size -= PAGE_SIZE;
- }
- pfn = virt_to_pfn(vstart);
- rsv_mm_info[i].pfn = pfn;
- rsv_mm_info[i].vir_addr = vstart;
- }
-
- sort_viraddr(rsv_mm_info, num_block);
-
- for (i = 0; i< num_block; i++) {
-
- /*
- * This API is used to exchage MFN for getting a block of
- * contiguous physical addresses, its maximum size is 2M.
- */
- #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
- if (xen_create_contiguous_region(rsv_mm_info[i].vir_addr,
- DOM0_CONTIG_NUM_ORDER, 0) == 0) {
- #else
- if (xen_create_contiguous_region(rsv_mm_info[i].pfn * PAGE_SIZE,
- DOM0_CONTIG_NUM_ORDER, 0, &dma_handle) == 0) {
- #endif
- rsv_mm_info[i].exchange_flag = 1;
- rsv_mm_info[i].mfn =
- pfn_to_mfn(rsv_mm_info[i].pfn);
- rsv_mm_info[i].used = 0;
- } else {
- XEN_ERR("exchange memeory fail\n");
- rsv_mm_info[i].exchange_flag = 0;
- dom0_dev.fail_times++;
- if (dom0_dev.fail_times > MAX_EXCHANGE_FAIL_TIME) {
- dom0_memory_free(rsv_size);
- return -EFAULT;
- }
- }
- }
-
- return 0;
-}
-
-static int
-dom0_prepare_memsegs(struct memory_info *meminfo, struct dom0_mm_data *mm_data)
-{
- uint32_t num_block;
- int idx;
-
- /* check if there is a free name buffer */
- memcpy(mm_data->name, meminfo->name, DOM0_NAME_MAX);
- mm_data->name[DOM0_NAME_MAX - 1] = '\0';
- idx = dom0_find_mempos();
- if (idx < 0)
- return -1;
-
- num_block = meminfo->size / SIZE_PER_BLOCK;
- /* find free memory and new memory segments*/
- find_free_memory(num_block, mm_data);
- find_memseg(num_block, mm_data);
-
- /* update private memory data */
- mm_data->refcnt++;
- mm_data->mem_size = meminfo->size;
-
- /* update global memory data */
- dom0_dev.mm_data[idx] = mm_data;
- dom0_dev.num_mem_ctx++;
- dom0_dev.used_memsize += mm_data->mem_size;
-
- return 0;
-}
-
-static int
-dom0_check_memory (struct memory_info *meminfo)
-{
- int idx;
- uint64_t mem_size;
-
- /* round memory size to the next even number. */
- if (meminfo->size % 2)
- ++meminfo->size;
-
- mem_size = meminfo->size;
- if (dom0_dev.num_mem_ctx > NUM_MEM_CTX) {
- XEN_ERR("Memory data space is full in Dom0 driver\n");
- return -1;
- }
- idx = dom0_find_memdata(meminfo->name);
- if (idx >= 0) {
- XEN_ERR("Memory data name %s has already exsited in Dom0 driver.\n",
- meminfo->name);
- return -1;
- }
- if ((dom0_dev.used_memsize + mem_size) > rsv_memsize) {
- XEN_ERR("Total size can't be larger than reserved size.\n");
- return -1;
- }
-
- return 0;
-}
-
-static int __init
-dom0_init(void)
-{
- if (!xen_domain())
- return -ENODEV;
-
- if (rsv_memsize > DOM0_CONFIG_MEMSIZE) {
- XEN_ERR("The reserved memory size cannot be greater than %d\n",
- DOM0_CONFIG_MEMSIZE);
- return -EINVAL;
- }
-
- /* Setup the misc device */
- dom0_dev.miscdev.minor = MISC_DYNAMIC_MINOR;
- dom0_dev.miscdev.name = "dom0_mm";
- dom0_dev.miscdev.fops = &data_fops;
-
- /* register misc char device */
- if (misc_register(&dom0_dev.miscdev) != 0) {
- XEN_ERR("Misc device registration failed\n");
- return -EPERM;
- }
-
- mutex_init(&dom0_dev.data_lock);
- dom0_kobj = kobject_create_and_add("dom0-mm", mm_kobj);
-
- if (!dom0_kobj) {
- XEN_ERR("dom0-mm object creation failed\n");
- misc_deregister(&dom0_dev.miscdev);
- return -ENOMEM;
- }
-
- if (sysfs_create_group(dom0_kobj, &dev_attr_grp)) {
- kobject_put(dom0_kobj);
- misc_deregister(&dom0_dev.miscdev);
- return -EPERM;
- }
-
- if (dom0_memory_reserve(rsv_memsize) < 0) {
- sysfs_remove_group(dom0_kobj, &dev_attr_grp);
- kobject_put(dom0_kobj);
- misc_deregister(&dom0_dev.miscdev);
- return -ENOMEM;
- }
-
- XEN_PRINT("####### DPDK Xen Dom0 module loaded #######\n");
-
- return 0;
-}
-
-static void __exit
-dom0_exit(void)
-{
- if (rsv_mm_info != NULL)
- dom0_memory_free(rsv_memsize);
-
- sysfs_remove_group(dom0_kobj, &dev_attr_grp);
- kobject_put(dom0_kobj);
- misc_deregister(&dom0_dev.miscdev);
-
- XEN_PRINT("####### DPDK Xen Dom0 module unloaded #######\n");
-}
-
-static int
-dom0_open(struct inode *inode, struct file *file)
-{
- file->private_data = NULL;
-
- XEN_PRINT(KERN_INFO "/dev/dom0_mm opened\n");
- return 0;
-}
-
-static int
-dom0_release(struct inode *inode, struct file *file)
-{
- int ret = 0;
- struct dom0_mm_data *mm_data = file->private_data;
-
- if (mm_data == NULL)
- return ret;
-
- mutex_lock(&dom0_dev.data_lock);
- if (--mm_data->refcnt == 0)
- ret = dom0_memory_release(mm_data);
- mutex_unlock(&dom0_dev.data_lock);
-
- file->private_data = NULL;
- XEN_PRINT(KERN_INFO "/dev/dom0_mm closed\n");
- return ret;
-}
-
-static int
-dom0_mmap(struct file *file, struct vm_area_struct *vm)
-{
- int status = 0;
- uint32_t idx = vm->vm_pgoff;
- uint64_t pfn, size = vm->vm_end - vm->vm_start;
- struct dom0_mm_data *mm_data = file->private_data;
-
- if(mm_data == NULL)
- return -EINVAL;
-
- mutex_lock(&dom0_dev.data_lock);
- if (idx >= mm_data->num_memseg) {
- mutex_unlock(&dom0_dev.data_lock);
- return -EINVAL;
- }
-
- if (size > mm_data->seg_info[idx].size){
- mutex_unlock(&dom0_dev.data_lock);
- return -EINVAL;
- }
-
- XEN_PRINT("mmap memseg idx =%d,size = 0x%llx\n", idx, size);
-
- pfn = mm_data->seg_info[idx].pfn;
- mutex_unlock(&dom0_dev.data_lock);
-
- status = remap_pfn_range(vm, vm->vm_start, pfn, size, PAGE_SHARED);
-
- return status;
-}
-static int
-dom0_ioctl(struct file *file,
- unsigned int ioctl_num,
- unsigned long ioctl_param)
-{
- int idx, ret;
- char name[DOM0_NAME_MAX] = {0};
- struct memory_info meminfo;
- struct dom0_mm_data *mm_data = file->private_data;
-
- XEN_PRINT("IOCTL num=0x%0x param=0x%0lx \n", ioctl_num, ioctl_param);
-
- /**
- * Switch according to the ioctl called
- */
- switch _IOC_NR(ioctl_num) {
- case _IOC_NR(RTE_DOM0_IOCTL_PREPARE_MEMSEG):
- ret = copy_from_user(&meminfo, (void *)ioctl_param,
- sizeof(struct memory_info));
- if (ret)
- return -EFAULT;
-
- if (mm_data != NULL) {
- XEN_ERR("Cannot create memory segment for the same"
- " file descriptor\n");
- return -EINVAL;
- }
-
- /* Allocate private data */
- mm_data = vmalloc(sizeof(struct dom0_mm_data));
- if (!mm_data) {
- XEN_ERR("Unable to allocate device private data\n");
- return -ENOMEM;
- }
- memset(mm_data, 0, sizeof(struct dom0_mm_data));
-
- mutex_lock(&dom0_dev.data_lock);
- /* check if we can allocate memory*/
- if (dom0_check_memory(&meminfo) < 0) {
- mutex_unlock(&dom0_dev.data_lock);
- vfree(mm_data);
- return -EINVAL;
- }
-
- /* allocate memory and created memory segments*/
- if (dom0_prepare_memsegs(&meminfo, mm_data) < 0) {
- XEN_ERR("create memory segment fail.\n");
- mutex_unlock(&dom0_dev.data_lock);
- return -EIO;
- }
-
- file->private_data = mm_data;
- mutex_unlock(&dom0_dev.data_lock);
- break;
-
- /* support multiple process in term of memory mapping*/
- case _IOC_NR(RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG):
- ret = copy_from_user(name, (void *)ioctl_param,
- sizeof(char) * DOM0_NAME_MAX);
- if (ret)
- return -EFAULT;
-
- mutex_lock(&dom0_dev.data_lock);
- idx = dom0_find_memdata(name);
- if (idx < 0) {
- mutex_unlock(&dom0_dev.data_lock);
- return -EINVAL;
- }
-
- mm_data = dom0_dev.mm_data[idx];
- mm_data->refcnt++;
- file->private_data = mm_data;
- mutex_unlock(&dom0_dev.data_lock);
- break;
-
- case _IOC_NR(RTE_DOM0_IOCTL_GET_NUM_MEMSEG):
- ret = copy_to_user((void *)ioctl_param, &mm_data->num_memseg,
- sizeof(int));
- if (ret)
- return -EFAULT;
- break;
-
- case _IOC_NR(RTE_DOM0_IOCTL_GET_MEMSEG_INFO):
- ret = copy_to_user((void *)ioctl_param,
- &mm_data->seg_info[0],
- sizeof(struct memseg_info) *
- mm_data->num_memseg);
- if (ret)
- return -EFAULT;
- break;
- default:
- XEN_PRINT("IOCTL default \n");
- break;
- }
-
- return 0;
-}
-
-module_init(dom0_init);
-module_exit(dom0_exit);
-
-module_param(rsv_memsize, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(rsv_memsize, "Xen-dom0 reserved memory size(MB).\n");
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 3a8f1540..f4f46c1b 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -44,8 +44,6 @@ DPDK_2.0 {
rte_free;
rte_get_hpet_cycles;
rte_get_hpet_hz;
- rte_get_log_level;
- rte_get_log_type;
rte_get_tsc_hz;
rte_hexdump;
rte_intr_callback_register;
@@ -62,9 +60,7 @@ DPDK_2.0 {
rte_malloc_set_limit;
rte_malloc_socket;
rte_malloc_validate;
- rte_malloc_virt2phy;
rte_mem_lock_page;
- rte_mem_phy2mch;
rte_mem_virt2phy;
rte_memdump;
rte_memory_get_nchannel;
@@ -78,8 +74,6 @@ DPDK_2.0 {
rte_openlog_stream;
rte_realloc;
rte_set_application_usage_hook;
- rte_set_log_level;
- rte_set_log_type;
rte_socket_id;
rte_strerror;
rte_strsplit;
@@ -87,8 +81,6 @@ DPDK_2.0 {
rte_thread_get_affinity;
rte_thread_set_affinity;
rte_vlog;
- rte_xen_dom0_memory_attach;
- rte_xen_dom0_memory_init;
rte_zmalloc;
rte_zmalloc_socket;
@@ -118,8 +110,6 @@ DPDK_2.2 {
rte_keepalive_dispatch_pings;
rte_keepalive_mark_alive;
rte_keepalive_register_core;
- rte_xen_dom0_supported;
- rte_xen_mem_phy2mch;
} DPDK_2.1;
@@ -134,7 +124,6 @@ DPDK_16.04 {
DPDK_16.07 {
global:
- pci_get_sysfs_path;
rte_keepalive_mark_sleep;
rte_keepalive_register_relay_callback;
rte_rtm_supported;
@@ -174,25 +163,6 @@ DPDK_17.05 {
rte_log_set_global_level;
rte_log_set_level;
rte_log_set_level_regexp;
- rte_pci_detach;
- rte_pci_dump;
- rte_pci_ioport_map;
- rte_pci_ioport_read;
- rte_pci_ioport_unmap;
- rte_pci_ioport_write;
- rte_pci_map_device;
- rte_pci_probe;
- rte_pci_probe_one;
- rte_pci_read_config;
- rte_pci_register;
- rte_pci_scan;
- rte_pci_unmap_device;
- rte_pci_unregister;
- rte_pci_write_config;
- rte_vdev_init;
- rte_vdev_register;
- rte_vdev_uninit;
- rte_vdev_unregister;
vfio_get_container_fd;
vfio_get_group_fd;
vfio_get_group_no;
@@ -209,6 +179,27 @@ DPDK_17.08 {
} DPDK_17.05;
+DPDK_17.11 {
+ global:
+
+ rte_eal_create_uio_dev;
+ rte_bus_get_iommu_class;
+ rte_eal_has_pci;
+ rte_eal_iova_mode;
+ rte_eal_mbuf_default_mempool_ops;
+ rte_eal_using_phys_addrs;
+ rte_eal_vfio_intr_mode;
+ rte_lcore_has_role;
+ rte_malloc_virt2iova;
+ rte_mem_virt2iova;
+ rte_vfio_enable;
+ rte_vfio_is_enabled;
+ rte_vfio_noiommu_is_enabled;
+ rte_vfio_release_device;
+ rte_vfio_setup_device;
+
+} DPDK_17.08;
+
EXPERIMENTAL {
global:
@@ -217,28 +208,31 @@ EXPERIMENTAL {
rte_eal_devargs_remove;
rte_eal_hotplug_add;
rte_eal_hotplug_remove;
- rte_service_disable_on_lcore;
+ rte_service_component_register;
+ rte_service_component_unregister;
+ rte_service_component_runstate_set;
rte_service_dump;
- rte_service_enable_on_lcore;
rte_service_get_by_id;
rte_service_get_by_name;
rte_service_get_count;
- rte_service_get_enabled_on_lcore;
- rte_service_is_running;
+ rte_service_get_name;
rte_service_lcore_add;
rte_service_lcore_count;
+ rte_service_lcore_count_services;
rte_service_lcore_del;
rte_service_lcore_list;
rte_service_lcore_reset_all;
rte_service_lcore_start;
rte_service_lcore_stop;
+ rte_service_map_lcore_get;
+ rte_service_map_lcore_set;
rte_service_probe_capability;
- rte_service_register;
rte_service_reset;
+ rte_service_run_iter_on_app_lcore;
+ rte_service_runstate_get;
+ rte_service_runstate_set;
+ rte_service_set_runstate_mapped_check;
rte_service_set_stats_enable;
- rte_service_start;
rte_service_start_with_defaults;
- rte_service_stop;
- rte_service_unregister;
-} DPDK_17.08;
+} DPDK_17.11;
diff --git a/lib/librte_efd/Makefile b/lib/librte_efd/Makefile
index b9277bc5..16e450e8 100644
--- a/lib/librte_efd/Makefile
+++ b/lib/librte_efd/Makefile
@@ -36,6 +36,7 @@ LIB = librte_efd.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+LDLIBS += -lrte_eal -lrte_ring -lrte_hash
EXPORT_MAP := rte_efd_version.map
diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c
index 4d9a0887..8771d042 100644
--- a/lib/librte_efd/rte_efd.c
+++ b/lib/librte_efd/rte_efd.c
@@ -42,7 +42,6 @@
#include <rte_eal_memconfig.h>
#include <rte_errno.h>
#include <rte_malloc.h>
-#include <rte_memzone.h>
#include <rte_prefetch.h>
#include <rte_branch_prediction.h>
#include <rte_memcpy.h>
@@ -1278,7 +1277,7 @@ efd_lookup_internal(const struct efd_online_group_entry * const group,
switch (lookup_fn) {
-#if defined(RTE_ARCH_X86)
+#if defined(RTE_ARCH_X86) && defined(CC_SUPPORT_AVX2)
case EFD_LOOKUP_AVX2:
return efd_lookup_internal_avx2(group->hash_idx,
group->lookup_table,
diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile
index db692ae4..394cc9c0 100644
--- a/lib/librte_ether/Makefile
+++ b/lib/librte_ether/Makefile
@@ -38,14 +38,18 @@ LIB = librte_ethdev.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_net -lrte_eal -lrte_mempool -lrte_ring
+LDLIBS += -lrte_mbuf
-EXPORT_MAP := rte_ether_version.map
+EXPORT_MAP := rte_ethdev_version.map
-LIBABIVER := 6
+LIBABIVER := 8
SRCS-y += rte_ethdev.c
SRCS-y += rte_flow.c
SRCS-y += rte_tm.c
+SRCS-y += rte_mtr.c
+SRCS-y += ethdev_profile.c
#
# Export include files
@@ -59,5 +63,7 @@ SYMLINK-y-include += rte_flow.h
SYMLINK-y-include += rte_flow_driver.h
SYMLINK-y-include += rte_tm.h
SYMLINK-y-include += rte_tm_driver.h
+SYMLINK-y-include += rte_mtr.h
+SYMLINK-y-include += rte_mtr_driver.h
include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ether/ethdev_profile.c b/lib/librte_ether/ethdev_profile.c
new file mode 100644
index 00000000..c9cb8420
--- /dev/null
+++ b/lib/librte_ether/ethdev_profile.c
@@ -0,0 +1,164 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ethdev_profile.h"
+
+/**
+ * This conditional block enables RX queues profiling by tracking wasted
+ * iterations, i.e. iterations which yielded no RX packets. Profiling is
+ * performed using the Instrumentation and Tracing Technology (ITT) API,
+ * employed by the Intel (R) VTune (TM) Amplifier.
+ */
+#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
+
+#include <ittnotify.h>
+
+#define ITT_MAX_NAME_LEN (100)
+
+/**
+ * Auxiliary ITT structure belonging to Ethernet device and using to:
+ * - track RX queue state to determine whether it is wasting loop iterations
+ * - begin or end ITT task using task domain and task name (handle)
+ */
+struct itt_profile_rx_data {
+ /**
+ * ITT domains for each queue.
+ */
+ __itt_domain *domains[RTE_MAX_QUEUES_PER_PORT];
+ /**
+ * ITT task names for each queue.
+ */
+ __itt_string_handle *handles[RTE_MAX_QUEUES_PER_PORT];
+ /**
+ * Flags indicating the queues state. Possible values:
+ * 1 - queue is wasting iterations,
+ * 0 - otherwise.
+ */
+ uint8_t queue_state[RTE_MAX_QUEUES_PER_PORT];
+};
+
+/**
+ * The pool of *itt_profile_rx_data* structures.
+ */
+struct itt_profile_rx_data itt_rx_data[RTE_MAX_ETHPORTS];
+
+
+/**
+ * This callback function manages ITT tasks collection on given port and queue.
+ * It must be registered with rte_eth_add_rx_callback() to be called from
+ * rte_eth_rx_burst(). To find more comments see rte_rx_callback_fn function
+ * type declaration.
+ */
+static uint16_t
+collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id,
+ __rte_unused struct rte_mbuf *pkts[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, __rte_unused void *user_param)
+{
+ if (unlikely(nb_pkts == 0)) {
+ if (!itt_rx_data[port_id].queue_state[queue_id]) {
+ __itt_task_begin(
+ itt_rx_data[port_id].domains[queue_id],
+ __itt_null, __itt_null,
+ itt_rx_data[port_id].handles[queue_id]);
+ itt_rx_data[port_id].queue_state[queue_id] = 1;
+ }
+ } else {
+ if (unlikely(itt_rx_data[port_id].queue_state[queue_id])) {
+ __itt_task_end(
+ itt_rx_data[port_id].domains[queue_id]);
+ itt_rx_data[port_id].queue_state[queue_id] = 0;
+ }
+ }
+ return nb_pkts;
+}
+
+/**
+ * Initialization of itt_profile_rx_data for a given Ethernet device.
+ * This function must be invoked when ethernet device is being configured.
+ * Result will be stored in the global array *itt_rx_data*.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param port_name
+ * The name of the Ethernet device.
+ * @param rx_queue_num
+ * The number of RX queues on specified port.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+static inline int
+itt_profile_rx_init(uint16_t port_id, char *port_name, uint8_t rx_queue_num)
+{
+ uint16_t q_id;
+
+ for (q_id = 0; q_id < rx_queue_num; ++q_id) {
+ char domain_name[ITT_MAX_NAME_LEN];
+
+ snprintf(domain_name, sizeof(domain_name),
+ "RXBurst.WastedIterations.Port_%s.Queue_%d",
+ port_name, q_id);
+ itt_rx_data[port_id].domains[q_id]
+ = __itt_domain_create(domain_name);
+
+ char task_name[ITT_MAX_NAME_LEN];
+
+ snprintf(task_name, sizeof(task_name),
+ "port id: %d; queue id: %d",
+ port_id, q_id);
+ itt_rx_data[port_id].handles[q_id]
+ = __itt_string_handle_create(task_name);
+
+ itt_rx_data[port_id].queue_state[q_id] = 0;
+
+ if (!rte_eth_add_rx_callback(
+ port_id, q_id, collect_itt_rx_burst_cb, NULL)) {
+ return -rte_errno;
+ }
+ }
+
+ return 0;
+}
+#endif /* RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS */
+
+int
+__rte_eth_profile_rx_init(__rte_unused uint16_t port_id,
+ __rte_unused struct rte_eth_dev *dev)
+{
+#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
+ return itt_profile_rx_init(
+ port_id, dev->data->name, dev->data->nb_rx_queues);
+#endif
+ return 0;
+}
diff --git a/lib/librte_ether/ethdev_profile.h b/lib/librte_ether/ethdev_profile.h
new file mode 100644
index 00000000..697facff
--- /dev/null
+++ b/lib/librte_ether/ethdev_profile.h
@@ -0,0 +1,56 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ETHDEV_PROFILE_H_
+#define _RTE_ETHDEV_PROFILE_H_
+
+#include "rte_ethdev.h"
+
+/**
+ * Initialization of profiling RX queues for the Ethernet device.
+ * Implementation of this function depends on chosen profiling method,
+ * defined in configs.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param dev
+ * Pointer to struct rte_eth_dev corresponding to given port_id.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+__rte_eth_profile_rx_init(uint16_t port_id, struct rte_eth_dev *dev);
+
+#endif
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 0597641e..318af286 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -47,7 +47,6 @@
#include <rte_log.h>
#include <rte_debug.h>
#include <rte_interrupts.h>
-#include <rte_pci.h>
#include <rte_memory.h>
#include <rte_memcpy.h>
#include <rte_memzone.h>
@@ -67,6 +66,7 @@
#include "rte_ether.h"
#include "rte_ethdev.h"
+#include "ethdev_profile.h"
static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
@@ -138,8 +138,8 @@ enum {
STAT_QMAP_RX
};
-uint8_t
-rte_eth_find_next(uint8_t port_id)
+uint16_t
+rte_eth_find_next(uint16_t port_id)
{
while (port_id < RTE_MAX_ETHPORTS &&
rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED)
@@ -178,16 +178,14 @@ rte_eth_dev_allocated(const char *name)
unsigned i;
for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
- if (rte_eth_devices[i].state == RTE_ETH_DEV_ATTACHED &&
- rte_eth_devices[i].device) {
- if (!strcmp(rte_eth_devices[i].device->name, name))
- return &rte_eth_devices[i];
- }
+ if ((rte_eth_devices[i].state == RTE_ETH_DEV_ATTACHED) &&
+ strcmp(rte_eth_devices[i].data->name, name) == 0)
+ return &rte_eth_devices[i];
}
return NULL;
}
-static uint8_t
+static uint16_t
rte_eth_dev_find_free_port(void)
{
unsigned i;
@@ -200,7 +198,7 @@ rte_eth_dev_find_free_port(void)
}
static struct rte_eth_dev *
-eth_dev_get(uint8_t port_id)
+eth_dev_get(uint16_t port_id)
{
struct rte_eth_dev *eth_dev = &rte_eth_devices[port_id];
@@ -216,7 +214,7 @@ eth_dev_get(uint8_t port_id)
struct rte_eth_dev *
rte_eth_dev_allocate(const char *name)
{
- uint8_t port_id;
+ uint16_t port_id;
struct rte_eth_dev *eth_dev;
port_id = rte_eth_dev_find_free_port();
@@ -251,7 +249,7 @@ rte_eth_dev_allocate(const char *name)
struct rte_eth_dev *
rte_eth_dev_attach_secondary(const char *name)
{
- uint8_t i;
+ uint16_t i;
struct rte_eth_dev *eth_dev;
if (rte_eth_dev_data == NULL)
@@ -285,7 +283,7 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
}
int
-rte_eth_dev_is_valid_port(uint8_t port_id)
+rte_eth_dev_is_valid_port(uint16_t port_id)
{
if (port_id >= RTE_MAX_ETHPORTS ||
(rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED &&
@@ -296,17 +294,24 @@ rte_eth_dev_is_valid_port(uint8_t port_id)
}
int
-rte_eth_dev_socket_id(uint8_t port_id)
+rte_eth_dev_socket_id(uint16_t port_id)
{
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -1);
return rte_eth_devices[port_id].data->numa_node;
}
-uint8_t
+void *
+rte_eth_dev_get_sec_ctx(uint8_t port_id)
+{
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
+ return rte_eth_devices[port_id].security_ctx;
+}
+
+uint16_t
rte_eth_dev_count(void)
{
- uint8_t p;
- uint8_t count;
+ uint16_t p;
+ uint16_t count;
count = 0;
@@ -317,9 +322,9 @@ rte_eth_dev_count(void)
}
int
-rte_eth_dev_get_name_by_port(uint8_t port_id, char *name)
+rte_eth_dev_get_name_by_port(uint16_t port_id, char *name)
{
- const char *tmp;
+ char *tmp;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
@@ -330,15 +335,14 @@ rte_eth_dev_get_name_by_port(uint8_t port_id, char *name)
/* shouldn't check 'rte_eth_devices[i].data',
* because it might be overwritten by VDEV PMD */
- tmp = rte_eth_devices[port_id].device->name;
+ tmp = rte_eth_dev_data[port_id].name;
strcpy(name, tmp);
return 0;
}
int
-rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id)
+rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id)
{
- int ret;
int i;
if (name == NULL) {
@@ -347,37 +351,20 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id)
}
RTE_ETH_FOREACH_DEV(i) {
- if (!rte_eth_devices[i].device)
- continue;
+ if (!strncmp(name,
+ rte_eth_dev_data[i].name, strlen(name))) {
- ret = strncmp(name, rte_eth_devices[i].device->name,
- strlen(name));
- if (ret == 0) {
*port_id = i;
+
return 0;
}
}
return -ENODEV;
}
-static int
-rte_eth_dev_is_detachable(uint8_t port_id)
-{
- uint32_t dev_flags;
-
- RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
-
- dev_flags = rte_eth_devices[port_id].data->dev_flags;
- if ((dev_flags & RTE_ETH_DEV_DETACHABLE) &&
- (!(dev_flags & RTE_ETH_DEV_BONDED_SLAVE)))
- return 0;
- else
- return 1;
-}
-
/* attach the new device, then store port_id of the device */
int
-rte_eth_dev_attach(const char *devargs, uint8_t *port_id)
+rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
{
int ret = -1;
int current = rte_eth_dev_count();
@@ -423,21 +410,28 @@ err:
/* detach the device, then store the name of the device */
int
-rte_eth_dev_detach(uint8_t port_id, char *name)
+rte_eth_dev_detach(uint16_t port_id, char *name)
{
+ uint32_t dev_flags;
int ret = -1;
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
if (name == NULL) {
ret = -EINVAL;
goto err;
}
- /* FIXME: move this to eal, once device flags are relocated there */
- if (rte_eth_dev_is_detachable(port_id))
+ dev_flags = rte_eth_devices[port_id].data->dev_flags;
+ if (dev_flags & RTE_ETH_DEV_BONDED_SLAVE) {
+ RTE_LOG(ERR, EAL, "Port %" PRIu16 " is bonded, cannot detach\n",
+ port_id);
+ ret = -ENOTSUP;
goto err;
+ }
- snprintf(name, RTE_DEV_NAME_MAX_LEN, "%s",
- rte_eth_devices[port_id].device->name);
+ snprintf(name, sizeof(rte_eth_devices[port_id].data->name),
+ "%s", rte_eth_devices[port_id].data->name);
ret = rte_eal_dev_detach(rte_eth_devices[port_id].device);
if (ret < 0)
@@ -501,7 +495,7 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
}
int
-rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id)
+rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id)
{
struct rte_eth_dev *dev;
@@ -527,7 +521,7 @@ rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id)
}
int
-rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id)
+rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id)
{
struct rte_eth_dev *dev;
@@ -553,7 +547,7 @@ rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id)
}
int
-rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id)
+rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id)
{
struct rte_eth_dev *dev;
@@ -579,7 +573,7 @@ rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id)
}
int
-rte_eth_dev_tx_queue_stop(uint8_t port_id, uint16_t tx_queue_id)
+rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id)
{
struct rte_eth_dev *dev;
@@ -687,12 +681,102 @@ rte_eth_speed_bitflag(uint32_t speed, int duplex)
}
}
+/**
+ * A conversion function from rxmode bitfield API.
+ */
+static void
+rte_eth_convert_rx_offload_bitfield(const struct rte_eth_rxmode *rxmode,
+ uint64_t *rx_offloads)
+{
+ uint64_t offloads = 0;
+
+ if (rxmode->header_split == 1)
+ offloads |= DEV_RX_OFFLOAD_HEADER_SPLIT;
+ if (rxmode->hw_ip_checksum == 1)
+ offloads |= DEV_RX_OFFLOAD_CHECKSUM;
+ if (rxmode->hw_vlan_filter == 1)
+ offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
+ if (rxmode->hw_vlan_strip == 1)
+ offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
+ if (rxmode->hw_vlan_extend == 1)
+ offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
+ if (rxmode->jumbo_frame == 1)
+ offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
+ if (rxmode->hw_strip_crc == 1)
+ offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
+ if (rxmode->enable_scatter == 1)
+ offloads |= DEV_RX_OFFLOAD_SCATTER;
+ if (rxmode->enable_lro == 1)
+ offloads |= DEV_RX_OFFLOAD_TCP_LRO;
+ if (rxmode->hw_timestamp == 1)
+ offloads |= DEV_RX_OFFLOAD_TIMESTAMP;
+ if (rxmode->security == 1)
+ offloads |= DEV_RX_OFFLOAD_SECURITY;
+
+ *rx_offloads = offloads;
+}
+
+/**
+ * A conversion function from rxmode offloads API.
+ */
+static void
+rte_eth_convert_rx_offloads(const uint64_t rx_offloads,
+ struct rte_eth_rxmode *rxmode)
+{
+
+ if (rx_offloads & DEV_RX_OFFLOAD_HEADER_SPLIT)
+ rxmode->header_split = 1;
+ else
+ rxmode->header_split = 0;
+ if (rx_offloads & DEV_RX_OFFLOAD_CHECKSUM)
+ rxmode->hw_ip_checksum = 1;
+ else
+ rxmode->hw_ip_checksum = 0;
+ if (rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER)
+ rxmode->hw_vlan_filter = 1;
+ else
+ rxmode->hw_vlan_filter = 0;
+ if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
+ rxmode->hw_vlan_strip = 1;
+ else
+ rxmode->hw_vlan_strip = 0;
+ if (rx_offloads & DEV_RX_OFFLOAD_VLAN_EXTEND)
+ rxmode->hw_vlan_extend = 1;
+ else
+ rxmode->hw_vlan_extend = 0;
+ if (rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME)
+ rxmode->jumbo_frame = 1;
+ else
+ rxmode->jumbo_frame = 0;
+ if (rx_offloads & DEV_RX_OFFLOAD_CRC_STRIP)
+ rxmode->hw_strip_crc = 1;
+ else
+ rxmode->hw_strip_crc = 0;
+ if (rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ rxmode->enable_scatter = 1;
+ else
+ rxmode->enable_scatter = 0;
+ if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO)
+ rxmode->enable_lro = 1;
+ else
+ rxmode->enable_lro = 0;
+ if (rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
+ rxmode->hw_timestamp = 1;
+ else
+ rxmode->hw_timestamp = 0;
+ if (rx_offloads & DEV_RX_OFFLOAD_SECURITY)
+ rxmode->security = 1;
+ else
+ rxmode->security = 0;
+}
+
int
-rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
+rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
const struct rte_eth_conf *dev_conf)
{
struct rte_eth_dev *dev;
struct rte_eth_dev_info dev_info;
+ struct rte_eth_conf local_conf = *dev_conf;
int diag;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
@@ -722,8 +806,20 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
return -EBUSY;
}
+ /*
+ * Convert between the offloads API to enable PMDs to support
+ * only one of them.
+ */
+ if ((dev_conf->rxmode.ignore_offload_bitfield == 0)) {
+ rte_eth_convert_rx_offload_bitfield(
+ &dev_conf->rxmode, &local_conf.rxmode.offloads);
+ } else {
+ rte_eth_convert_rx_offloads(dev_conf->rxmode.offloads,
+ &local_conf.rxmode);
+ }
+
/* Copy the dev_conf parameter into the dev structure */
- memcpy(&dev->data->dev_conf, dev_conf, sizeof(dev->data->dev_conf));
+ memcpy(&dev->data->dev_conf, &local_conf, sizeof(dev->data->dev_conf));
/*
* Check that the numbers of RX and TX queues are not greater
@@ -767,7 +863,7 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
* If jumbo frames are enabled, check that the maximum RX packet
* length is supported by the configured device.
*/
- if (dev_conf->rxmode.jumbo_frame == 1) {
+ if (local_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
if (dev_conf->rxmode.max_rx_pkt_len >
dev_info.max_rx_pktlen) {
RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u"
@@ -819,6 +915,16 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
return diag;
}
+ /* Initialize Rx profiling if enabled at compilation time. */
+ diag = __rte_eth_profile_rx_init(port_id, dev);
+ if (diag != 0) {
+ RTE_PMD_DEBUG_TRACE("port%d __rte_eth_profile_rx_init = %d\n",
+ port_id, diag);
+ rte_eth_dev_rx_queue_config(dev, 0);
+ rte_eth_dev_tx_queue_config(dev, 0);
+ return diag;
+ }
+
return 0;
}
@@ -839,7 +945,7 @@ _rte_eth_dev_reset(struct rte_eth_dev *dev)
}
static void
-rte_eth_dev_config_restore(uint8_t port_id)
+rte_eth_dev_config_restore(uint16_t port_id)
{
struct rte_eth_dev *dev;
struct rte_eth_dev_info dev_info;
@@ -894,7 +1000,7 @@ rte_eth_dev_config_restore(uint8_t port_id)
}
int
-rte_eth_dev_start(uint8_t port_id)
+rte_eth_dev_start(uint16_t port_id)
{
struct rte_eth_dev *dev;
int diag;
@@ -906,7 +1012,7 @@ rte_eth_dev_start(uint8_t port_id)
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP);
if (dev->data->dev_started != 0) {
- RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu8
+ RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu16
" already started\n",
port_id);
return 0;
@@ -928,7 +1034,7 @@ rte_eth_dev_start(uint8_t port_id)
}
void
-rte_eth_dev_stop(uint8_t port_id)
+rte_eth_dev_stop(uint16_t port_id)
{
struct rte_eth_dev *dev;
@@ -938,7 +1044,7 @@ rte_eth_dev_stop(uint8_t port_id)
RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_stop);
if (dev->data->dev_started == 0) {
- RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu8
+ RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu16
" already stopped\n",
port_id);
return;
@@ -949,7 +1055,7 @@ rte_eth_dev_stop(uint8_t port_id)
}
int
-rte_eth_dev_set_link_up(uint8_t port_id)
+rte_eth_dev_set_link_up(uint16_t port_id)
{
struct rte_eth_dev *dev;
@@ -962,7 +1068,7 @@ rte_eth_dev_set_link_up(uint8_t port_id)
}
int
-rte_eth_dev_set_link_down(uint8_t port_id)
+rte_eth_dev_set_link_down(uint16_t port_id)
{
struct rte_eth_dev *dev;
@@ -975,7 +1081,7 @@ rte_eth_dev_set_link_down(uint8_t port_id)
}
void
-rte_eth_dev_close(uint8_t port_id)
+rte_eth_dev_close(uint16_t port_id)
{
struct rte_eth_dev *dev;
@@ -995,7 +1101,24 @@ rte_eth_dev_close(uint8_t port_id)
}
int
-rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id,
+rte_eth_dev_reset(uint16_t port_id)
+{
+ struct rte_eth_dev *dev;
+ int ret;
+
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+ dev = &rte_eth_devices[port_id];
+
+ RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_reset, -ENOTSUP);
+
+ rte_eth_dev_stop(port_id);
+ ret = dev->dev_ops->dev_reset(dev);
+
+ return ret;
+}
+
+int
+rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
uint16_t nb_rx_desc, unsigned int socket_id,
const struct rte_eth_rxconf *rx_conf,
struct rte_mempool *mp)
@@ -1004,6 +1127,7 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id,
uint32_t mbp_buf_size;
struct rte_eth_dev *dev;
struct rte_eth_dev_info dev_info;
+ struct rte_eth_rxconf local_conf;
void **rxq;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
@@ -1074,8 +1198,18 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id,
if (rx_conf == NULL)
rx_conf = &dev_info.default_rxconf;
+ local_conf = *rx_conf;
+ if (dev->data->dev_conf.rxmode.ignore_offload_bitfield == 0) {
+ /**
+ * Reflect port offloads to queue offloads in order for
+ * offloads to not be discarded.
+ */
+ rte_eth_convert_rx_offload_bitfield(&dev->data->dev_conf.rxmode,
+ &local_conf.offloads);
+ }
+
ret = (*dev->dev_ops->rx_queue_setup)(dev, rx_queue_id, nb_rx_desc,
- socket_id, rx_conf, mp);
+ socket_id, &local_conf, mp);
if (!ret) {
if (!dev->data->min_rx_buf_size ||
dev->data->min_rx_buf_size > mbp_buf_size)
@@ -1085,13 +1219,63 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id,
return ret;
}
+/**
+ * A conversion function from txq_flags API.
+ */
+static void
+rte_eth_convert_txq_flags(const uint32_t txq_flags, uint64_t *tx_offloads)
+{
+ uint64_t offloads = 0;
+
+ if (!(txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS))
+ offloads |= DEV_TX_OFFLOAD_MULTI_SEGS;
+ if (!(txq_flags & ETH_TXQ_FLAGS_NOVLANOFFL))
+ offloads |= DEV_TX_OFFLOAD_VLAN_INSERT;
+ if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP))
+ offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM;
+ if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMUDP))
+ offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
+ if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMTCP))
+ offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
+ if ((txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT) &&
+ (txq_flags & ETH_TXQ_FLAGS_NOMULTMEMP))
+ offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
+
+ *tx_offloads = offloads;
+}
+
+/**
+ * A conversion function from offloads API.
+ */
+static void
+rte_eth_convert_txq_offloads(const uint64_t tx_offloads, uint32_t *txq_flags)
+{
+ uint32_t flags = 0;
+
+ if (!(tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS))
+ flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
+ if (!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT))
+ flags |= ETH_TXQ_FLAGS_NOVLANOFFL;
+ if (!(tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM))
+ flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
+ if (!(tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM))
+ flags |= ETH_TXQ_FLAGS_NOXSUMUDP;
+ if (!(tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM))
+ flags |= ETH_TXQ_FLAGS_NOXSUMTCP;
+ if (tx_offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
+ flags |= (ETH_TXQ_FLAGS_NOREFCOUNT | ETH_TXQ_FLAGS_NOMULTMEMP);
+
+ *txq_flags = flags;
+}
+
int
-rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id,
+rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
uint16_t nb_tx_desc, unsigned int socket_id,
const struct rte_eth_txconf *tx_conf)
{
struct rte_eth_dev *dev;
struct rte_eth_dev_info dev_info;
+ struct rte_eth_txconf local_conf;
void **txq;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
@@ -1136,8 +1320,23 @@ rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id,
if (tx_conf == NULL)
tx_conf = &dev_info.default_txconf;
+ /*
+ * Convert between the offloads API to enable PMDs to support
+ * only one of them.
+ */
+ local_conf = *tx_conf;
+ if (tx_conf->txq_flags & ETH_TXQ_FLAGS_IGNORE) {
+ rte_eth_convert_txq_offloads(tx_conf->offloads,
+ &local_conf.txq_flags);
+ /* Keep the ignore flag. */
+ local_conf.txq_flags |= ETH_TXQ_FLAGS_IGNORE;
+ } else {
+ rte_eth_convert_txq_flags(tx_conf->txq_flags,
+ &local_conf.offloads);
+ }
+
return (*dev->dev_ops->tx_queue_setup)(dev, tx_queue_id, nb_tx_desc,
- socket_id, tx_conf);
+ socket_id, &local_conf);
}
void
@@ -1190,7 +1389,7 @@ rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size)
}
int
-rte_eth_tx_done_cleanup(uint8_t port_id, uint16_t queue_id, uint32_t free_cnt)
+rte_eth_tx_done_cleanup(uint16_t port_id, uint16_t queue_id, uint32_t free_cnt)
{
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
@@ -1204,7 +1403,7 @@ rte_eth_tx_done_cleanup(uint8_t port_id, uint16_t queue_id, uint32_t free_cnt)
}
void
-rte_eth_promiscuous_enable(uint8_t port_id)
+rte_eth_promiscuous_enable(uint16_t port_id)
{
struct rte_eth_dev *dev;
@@ -1217,7 +1416,7 @@ rte_eth_promiscuous_enable(uint8_t port_id)
}
void
-rte_eth_promiscuous_disable(uint8_t port_id)
+rte_eth_promiscuous_disable(uint16_t port_id)
{
struct rte_eth_dev *dev;
@@ -1230,7 +1429,7 @@ rte_eth_promiscuous_disable(uint8_t port_id)
}
int
-rte_eth_promiscuous_get(uint8_t port_id)
+rte_eth_promiscuous_get(uint16_t port_id)
{
struct rte_eth_dev *dev;
@@ -1241,7 +1440,7 @@ rte_eth_promiscuous_get(uint8_t port_id)
}
void
-rte_eth_allmulticast_enable(uint8_t port_id)
+rte_eth_allmulticast_enable(uint16_t port_id)
{
struct rte_eth_dev *dev;
@@ -1254,7 +1453,7 @@ rte_eth_allmulticast_enable(uint8_t port_id)
}
void
-rte_eth_allmulticast_disable(uint8_t port_id)
+rte_eth_allmulticast_disable(uint16_t port_id)
{
struct rte_eth_dev *dev;
@@ -1267,7 +1466,7 @@ rte_eth_allmulticast_disable(uint8_t port_id)
}
int
-rte_eth_allmulticast_get(uint8_t port_id)
+rte_eth_allmulticast_get(uint16_t port_id)
{
struct rte_eth_dev *dev;
@@ -1292,7 +1491,7 @@ rte_eth_dev_atomic_read_link_status(struct rte_eth_dev *dev,
}
void
-rte_eth_link_get(uint8_t port_id, struct rte_eth_link *eth_link)
+rte_eth_link_get(uint16_t port_id, struct rte_eth_link *eth_link)
{
struct rte_eth_dev *dev;
@@ -1309,7 +1508,7 @@ rte_eth_link_get(uint8_t port_id, struct rte_eth_link *eth_link)
}
void
-rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *eth_link)
+rte_eth_link_get_nowait(uint16_t port_id, struct rte_eth_link *eth_link)
{
struct rte_eth_dev *dev;
@@ -1326,7 +1525,7 @@ rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *eth_link)
}
int
-rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats)
+rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats)
{
struct rte_eth_dev *dev;
@@ -1337,25 +1536,42 @@ rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats)
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_get, -ENOTSUP);
stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
- (*dev->dev_ops->stats_get)(dev, stats);
- return 0;
+ return (*dev->dev_ops->stats_get)(dev, stats);
}
-void
-rte_eth_stats_reset(uint8_t port_id)
+int
+rte_eth_stats_reset(uint16_t port_id)
{
struct rte_eth_dev *dev;
- RTE_ETH_VALID_PORTID_OR_RET(port_id);
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
dev = &rte_eth_devices[port_id];
- RTE_FUNC_PTR_OR_RET(*dev->dev_ops->stats_reset);
+ RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_reset, -ENOTSUP);
(*dev->dev_ops->stats_reset)(dev);
dev->data->rx_mbuf_alloc_failed = 0;
+
+ return 0;
+}
+
+static inline int
+get_xstats_basic_count(struct rte_eth_dev *dev)
+{
+ uint16_t nb_rxqs, nb_txqs;
+ int count;
+
+ nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS);
+ nb_txqs = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS);
+
+ count = RTE_NB_STATS;
+ count += nb_rxqs * RTE_NB_RXQ_STATS;
+ count += nb_txqs * RTE_NB_TXQ_STATS;
+
+ return count;
}
static int
-get_xstats_count(uint8_t port_id)
+get_xstats_count(uint16_t port_id)
{
struct rte_eth_dev *dev;
int count;
@@ -1375,16 +1591,14 @@ get_xstats_count(uint8_t port_id)
} else
count = 0;
- count += RTE_NB_STATS;
- count += RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS) *
- RTE_NB_RXQ_STATS;
- count += RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS) *
- RTE_NB_TXQ_STATS;
+
+ count += get_xstats_basic_count(dev);
+
return count;
}
int
-rte_eth_xstats_get_id_by_name(uint8_t port_id, const char *xstat_name,
+rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name,
uint64_t *id)
{
int cnt_xstats, idx_xstat;
@@ -1427,125 +1641,97 @@ rte_eth_xstats_get_id_by_name(uint8_t port_id, const char *xstat_name,
return -EINVAL;
}
+/* retrieve ethdev extended statistics names */
int
-rte_eth_xstats_get_names_by_id(uint8_t port_id,
+rte_eth_xstats_get_names_by_id(uint16_t port_id,
struct rte_eth_xstat_name *xstats_names, unsigned int size,
uint64_t *ids)
{
- /* Get all xstats */
+ struct rte_eth_xstat_name *xstats_names_copy;
+ unsigned int no_basic_stat_requested = 1;
+ unsigned int expected_entries;
+ struct rte_eth_dev *dev;
+ unsigned int i;
+ int ret;
+
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+ dev = &rte_eth_devices[port_id];
+
+ ret = get_xstats_count(port_id);
+ if (ret < 0)
+ return ret;
+ expected_entries = (unsigned int)ret;
+
+ /* Return max number of stats if no ids given */
if (!ids) {
- struct rte_eth_dev *dev;
- int cnt_used_entries;
- int cnt_expected_entries;
- int cnt_driver_entries;
- uint32_t idx, id_queue;
- uint16_t num_q;
-
- cnt_expected_entries = get_xstats_count(port_id);
- if (xstats_names == NULL || cnt_expected_entries < 0 ||
- (int)size < cnt_expected_entries)
- return cnt_expected_entries;
-
- /* port_id checked in get_xstats_count() */
- dev = &rte_eth_devices[port_id];
- cnt_used_entries = 0;
-
- for (idx = 0; idx < RTE_NB_STATS; idx++) {
- snprintf(xstats_names[cnt_used_entries].name,
- sizeof(xstats_names[0].name),
- "%s", rte_stats_strings[idx].name);
- cnt_used_entries++;
- }
- num_q = RTE_MIN(dev->data->nb_rx_queues,
- RTE_ETHDEV_QUEUE_STAT_CNTRS);
- for (id_queue = 0; id_queue < num_q; id_queue++) {
- for (idx = 0; idx < RTE_NB_RXQ_STATS; idx++) {
- snprintf(xstats_names[cnt_used_entries].name,
- sizeof(xstats_names[0].name),
- "rx_q%u%s",
- id_queue,
- rte_rxq_stats_strings[idx].name);
- cnt_used_entries++;
- }
+ if (!xstats_names)
+ return expected_entries;
+ else if (xstats_names && size < expected_entries)
+ return expected_entries;
+ }
- }
- num_q = RTE_MIN(dev->data->nb_tx_queues,
- RTE_ETHDEV_QUEUE_STAT_CNTRS);
- for (id_queue = 0; id_queue < num_q; id_queue++) {
- for (idx = 0; idx < RTE_NB_TXQ_STATS; idx++) {
- snprintf(xstats_names[cnt_used_entries].name,
- sizeof(xstats_names[0].name),
- "tx_q%u%s",
- id_queue,
- rte_txq_stats_strings[idx].name);
- cnt_used_entries++;
+ if (ids && !xstats_names)
+ return -EINVAL;
+
+ if (ids && dev->dev_ops->xstats_get_names_by_id != NULL && size > 0) {
+ unsigned int basic_count = get_xstats_basic_count(dev);
+ uint64_t ids_copy[size];
+
+ for (i = 0; i < size; i++) {
+ if (ids[i] < basic_count) {
+ no_basic_stat_requested = 0;
+ break;
}
- }
- if (dev->dev_ops->xstats_get_names_by_id != NULL) {
- /* If there are any driver-specific xstats, append them
- * to end of list.
+ /*
+ * Convert ids to xstats ids that PMD knows.
+ * ids known by user are basic + extended stats.
*/
- cnt_driver_entries =
- (*dev->dev_ops->xstats_get_names_by_id)(
- dev,
- xstats_names + cnt_used_entries,
- NULL,
- size - cnt_used_entries);
- if (cnt_driver_entries < 0)
- return cnt_driver_entries;
- cnt_used_entries += cnt_driver_entries;
-
- } else if (dev->dev_ops->xstats_get_names != NULL) {
- /* If there are any driver-specific xstats, append them
- * to end of list.
- */
- cnt_driver_entries = (*dev->dev_ops->xstats_get_names)(
- dev,
- xstats_names + cnt_used_entries,
- size - cnt_used_entries);
- if (cnt_driver_entries < 0)
- return cnt_driver_entries;
- cnt_used_entries += cnt_driver_entries;
+ ids_copy[i] = ids[i] - basic_count;
}
- return cnt_used_entries;
+ if (no_basic_stat_requested)
+ return (*dev->dev_ops->xstats_get_names_by_id)(dev,
+ xstats_names, ids_copy, size);
}
- /* Get only xstats given by IDS */
- else {
- uint16_t len, i;
- struct rte_eth_xstat_name *xstats_names_copy;
- len = rte_eth_xstats_get_names_by_id(port_id, NULL, 0, NULL);
+ /* Retrieve all stats */
+ if (!ids) {
+ int num_stats = rte_eth_xstats_get_names(port_id, xstats_names,
+ expected_entries);
+ if (num_stats < 0 || num_stats > (int)expected_entries)
+ return num_stats;
+ else
+ return expected_entries;
+ }
- xstats_names_copy =
- malloc(sizeof(struct rte_eth_xstat_name) * len);
- if (!xstats_names_copy) {
- RTE_PMD_DEBUG_TRACE(
- "ERROR: can't allocate memory for values_copy\n");
- free(xstats_names_copy);
- return -1;
- }
+ xstats_names_copy = calloc(expected_entries,
+ sizeof(struct rte_eth_xstat_name));
- rte_eth_xstats_get_names_by_id(port_id, xstats_names_copy,
- len, NULL);
+ if (!xstats_names_copy) {
+ RTE_PMD_DEBUG_TRACE("ERROR: can't allocate memory");
+ return -ENOMEM;
+ }
- for (i = 0; i < size; i++) {
- if (ids[i] >= len) {
- RTE_PMD_DEBUG_TRACE(
- "ERROR: id value isn't valid\n");
- return -1;
- }
- strcpy(xstats_names[i].name,
- xstats_names_copy[ids[i]].name);
+ /* Fill xstats_names_copy structure */
+ rte_eth_xstats_get_names(port_id, xstats_names_copy, expected_entries);
+
+ /* Filter stats */
+ for (i = 0; i < size; i++) {
+ if (ids[i] >= expected_entries) {
+ RTE_PMD_DEBUG_TRACE("ERROR: id value isn't valid\n");
+ free(xstats_names_copy);
+ return -1;
}
- free(xstats_names_copy);
- return size;
+ xstats_names[i] = xstats_names_copy[ids[i]];
}
+
+ free(xstats_names_copy);
+ return size;
}
int
-rte_eth_xstats_get_names(uint8_t port_id,
+rte_eth_xstats_get_names(uint16_t port_id,
struct rte_eth_xstat_name *xstats_names,
unsigned int size)
{
@@ -1611,133 +1797,80 @@ rte_eth_xstats_get_names(uint8_t port_id,
/* retrieve ethdev extended statistics */
int
-rte_eth_xstats_get_by_id(uint8_t port_id, const uint64_t *ids, uint64_t *values,
- unsigned int n)
+rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids,
+ uint64_t *values, unsigned int size)
{
- /* If need all xstats */
- if (!ids) {
- struct rte_eth_stats eth_stats;
- struct rte_eth_dev *dev;
- unsigned int count = 0, i, q;
- signed int xcount = 0;
- uint64_t val, *stats_ptr;
- uint16_t nb_rxqs, nb_txqs;
-
- RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
- dev = &rte_eth_devices[port_id];
-
- nb_rxqs = RTE_MIN(dev->data->nb_rx_queues,
- RTE_ETHDEV_QUEUE_STAT_CNTRS);
- nb_txqs = RTE_MIN(dev->data->nb_tx_queues,
- RTE_ETHDEV_QUEUE_STAT_CNTRS);
-
- /* Return generic statistics */
- count = RTE_NB_STATS + (nb_rxqs * RTE_NB_RXQ_STATS) +
- (nb_txqs * RTE_NB_TXQ_STATS);
-
-
- /* implemented by the driver */
- if (dev->dev_ops->xstats_get_by_id != NULL) {
- /* Retrieve the xstats from the driver at the end of the
- * xstats struct. Retrieve all xstats.
- */
- xcount = (*dev->dev_ops->xstats_get_by_id)(dev,
- NULL,
- values ? values + count : NULL,
- (n > count) ? n - count : 0);
-
- if (xcount < 0)
- return xcount;
- /* implemented by the driver */
- } else if (dev->dev_ops->xstats_get != NULL) {
- /* Retrieve the xstats from the driver at the end of the
- * xstats struct. Retrieve all xstats.
- * Compatibility for PMD without xstats_get_by_ids
- */
- unsigned int size = (n > count) ? n - count : 1;
- struct rte_eth_xstat xstats[size];
-
- xcount = (*dev->dev_ops->xstats_get)(dev,
- values ? xstats : NULL, size);
-
- if (xcount < 0)
- return xcount;
+ unsigned int no_basic_stat_requested = 1;
+ unsigned int num_xstats_filled;
+ uint16_t expected_entries;
+ struct rte_eth_dev *dev;
+ unsigned int i;
+ int ret;
- if (values != NULL)
- for (i = 0 ; i < (unsigned int)xcount; i++)
- values[i + count] = xstats[i].value;
- }
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+ expected_entries = get_xstats_count(port_id);
+ struct rte_eth_xstat xstats[expected_entries];
+ dev = &rte_eth_devices[port_id];
- if (n < count + xcount || values == NULL)
- return count + xcount;
+ /* Return max number of stats if no ids given */
+ if (!ids) {
+ if (!values)
+ return expected_entries;
+ else if (values && size < expected_entries)
+ return expected_entries;
+ }
- /* now fill the xstats structure */
- count = 0;
- rte_eth_stats_get(port_id, &eth_stats);
+ if (ids && !values)
+ return -EINVAL;
- /* global stats */
- for (i = 0; i < RTE_NB_STATS; i++) {
- stats_ptr = RTE_PTR_ADD(&eth_stats,
- rte_stats_strings[i].offset);
- val = *stats_ptr;
- values[count++] = val;
- }
+ if (ids && dev->dev_ops->xstats_get_by_id != NULL && size) {
+ unsigned int basic_count = get_xstats_basic_count(dev);
+ uint64_t ids_copy[size];
- /* per-rxq stats */
- for (q = 0; q < nb_rxqs; q++) {
- for (i = 0; i < RTE_NB_RXQ_STATS; i++) {
- stats_ptr = RTE_PTR_ADD(&eth_stats,
- rte_rxq_stats_strings[i].offset +
- q * sizeof(uint64_t));
- val = *stats_ptr;
- values[count++] = val;
+ for (i = 0; i < size; i++) {
+ if (ids[i] < basic_count) {
+ no_basic_stat_requested = 0;
+ break;
}
- }
- /* per-txq stats */
- for (q = 0; q < nb_txqs; q++) {
- for (i = 0; i < RTE_NB_TXQ_STATS; i++) {
- stats_ptr = RTE_PTR_ADD(&eth_stats,
- rte_txq_stats_strings[i].offset +
- q * sizeof(uint64_t));
- val = *stats_ptr;
- values[count++] = val;
- }
+ /*
+ * Convert ids to xstats ids that PMD knows.
+ * ids known by user are basic + extended stats.
+ */
+ ids_copy[i] = ids[i] - basic_count;
}
- return count + xcount;
+ if (no_basic_stat_requested)
+ return (*dev->dev_ops->xstats_get_by_id)(dev, ids_copy,
+ values, size);
}
- /* Need only xstats given by IDS array */
- else {
- uint16_t i, size;
- uint64_t *values_copy;
-
- size = rte_eth_xstats_get_by_id(port_id, NULL, NULL, 0);
- values_copy = malloc(sizeof(*values_copy) * size);
- if (!values_copy) {
- RTE_PMD_DEBUG_TRACE(
- "ERROR: can't allocate memory for values_copy\n");
- return -1;
- }
+ /* Fill the xstats structure */
+ ret = rte_eth_xstats_get(port_id, xstats, expected_entries);
+ if (ret < 0)
+ return ret;
+ num_xstats_filled = (unsigned int)ret;
- rte_eth_xstats_get_by_id(port_id, NULL, values_copy, size);
+ /* Return all stats */
+ if (!ids) {
+ for (i = 0; i < num_xstats_filled; i++)
+ values[i] = xstats[i].value;
+ return expected_entries;
+ }
- for (i = 0; i < n; i++) {
- if (ids[i] >= size) {
- RTE_PMD_DEBUG_TRACE(
- "ERROR: id value isn't valid\n");
- return -1;
- }
- values[i] = values_copy[ids[i]];
+ /* Filter stats */
+ for (i = 0; i < size; i++) {
+ if (ids[i] >= expected_entries) {
+ RTE_PMD_DEBUG_TRACE("ERROR: id value isn't valid\n");
+ return -1;
}
- free(values_copy);
- return n;
+ values[i] = xstats[ids[i]].value;
}
+ return size;
}
int
-rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats,
+rte_eth_xstats_get(uint16_t port_id, struct rte_eth_xstat *xstats,
unsigned int n)
{
struct rte_eth_stats eth_stats;
@@ -1819,7 +1952,7 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats,
/* reset ethdev extended statistics */
void
-rte_eth_xstats_reset(uint8_t port_id)
+rte_eth_xstats_reset(uint16_t port_id)
{
struct rte_eth_dev *dev;
@@ -1837,7 +1970,7 @@ rte_eth_xstats_reset(uint8_t port_id)
}
static int
-set_queue_stats_mapping(uint8_t port_id, uint16_t queue_id, uint8_t stat_idx,
+set_queue_stats_mapping(uint16_t port_id, uint16_t queue_id, uint8_t stat_idx,
uint8_t is_rx)
{
struct rte_eth_dev *dev;
@@ -1853,7 +1986,7 @@ set_queue_stats_mapping(uint8_t port_id, uint16_t queue_id, uint8_t stat_idx,
int
-rte_eth_dev_set_tx_queue_stats_mapping(uint8_t port_id, uint16_t tx_queue_id,
+rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id, uint16_t tx_queue_id,
uint8_t stat_idx)
{
return set_queue_stats_mapping(port_id, tx_queue_id, stat_idx,
@@ -1862,7 +1995,7 @@ rte_eth_dev_set_tx_queue_stats_mapping(uint8_t port_id, uint16_t tx_queue_id,
int
-rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, uint16_t rx_queue_id,
+rte_eth_dev_set_rx_queue_stats_mapping(uint16_t port_id, uint16_t rx_queue_id,
uint8_t stat_idx)
{
return set_queue_stats_mapping(port_id, rx_queue_id, stat_idx,
@@ -1870,7 +2003,7 @@ rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, uint16_t rx_queue_id,
}
int
-rte_eth_dev_fw_version_get(uint8_t port_id, char *fw_version, size_t fw_size)
+rte_eth_dev_fw_version_get(uint16_t port_id, char *fw_version, size_t fw_size)
{
struct rte_eth_dev *dev;
@@ -1882,7 +2015,7 @@ rte_eth_dev_fw_version_get(uint8_t port_id, char *fw_version, size_t fw_size)
}
void
-rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info)
+rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info)
{
struct rte_eth_dev *dev;
const struct rte_eth_desc_lim lim = {
@@ -1906,7 +2039,7 @@ rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info)
}
int
-rte_eth_dev_get_supported_ptypes(uint8_t port_id, uint32_t ptype_mask,
+rte_eth_dev_get_supported_ptypes(uint16_t port_id, uint32_t ptype_mask,
uint32_t *ptypes, int num)
{
int i, j;
@@ -1932,7 +2065,7 @@ rte_eth_dev_get_supported_ptypes(uint8_t port_id, uint32_t ptype_mask,
}
void
-rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr)
+rte_eth_macaddr_get(uint16_t port_id, struct ether_addr *mac_addr)
{
struct rte_eth_dev *dev;
@@ -1943,7 +2076,7 @@ rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr)
int
-rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu)
+rte_eth_dev_get_mtu(uint16_t port_id, uint16_t *mtu)
{
struct rte_eth_dev *dev;
@@ -1955,7 +2088,7 @@ rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu)
}
int
-rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu)
+rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu)
{
int ret;
struct rte_eth_dev *dev;
@@ -1972,14 +2105,15 @@ rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu)
}
int
-rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
+rte_eth_dev_vlan_filter(uint16_t port_id, uint16_t vlan_id, int on)
{
struct rte_eth_dev *dev;
int ret;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
dev = &rte_eth_devices[port_id];
- if (!(dev->data->dev_conf.rxmode.hw_vlan_filter)) {
+ if (!(dev->data->dev_conf.rxmode.offloads &
+ DEV_RX_OFFLOAD_VLAN_FILTER)) {
RTE_PMD_DEBUG_TRACE("port %d: vlan-filtering disabled\n", port_id);
return -ENOSYS;
}
@@ -2011,7 +2145,8 @@ rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
}
int
-rte_eth_dev_set_vlan_strip_on_queue(uint8_t port_id, uint16_t rx_queue_id, int on)
+rte_eth_dev_set_vlan_strip_on_queue(uint16_t port_id, uint16_t rx_queue_id,
+ int on)
{
struct rte_eth_dev *dev;
@@ -2029,7 +2164,7 @@ rte_eth_dev_set_vlan_strip_on_queue(uint8_t port_id, uint16_t rx_queue_id, int o
}
int
-rte_eth_dev_set_vlan_ether_type(uint8_t port_id,
+rte_eth_dev_set_vlan_ether_type(uint16_t port_id,
enum rte_vlan_type vlan_type,
uint16_t tpid)
{
@@ -2043,35 +2178,57 @@ rte_eth_dev_set_vlan_ether_type(uint8_t port_id,
}
int
-rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask)
+rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask)
{
struct rte_eth_dev *dev;
int ret = 0;
int mask = 0;
int cur, org = 0;
+ uint64_t orig_offloads;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
dev = &rte_eth_devices[port_id];
+ /* save original values in case of failure */
+ orig_offloads = dev->data->dev_conf.rxmode.offloads;
+
/*check which option changed by application*/
cur = !!(offload_mask & ETH_VLAN_STRIP_OFFLOAD);
- org = !!(dev->data->dev_conf.rxmode.hw_vlan_strip);
+ org = !!(dev->data->dev_conf.rxmode.offloads &
+ DEV_RX_OFFLOAD_VLAN_STRIP);
if (cur != org) {
- dev->data->dev_conf.rxmode.hw_vlan_strip = (uint8_t)cur;
+ if (cur)
+ dev->data->dev_conf.rxmode.offloads |=
+ DEV_RX_OFFLOAD_VLAN_STRIP;
+ else
+ dev->data->dev_conf.rxmode.offloads &=
+ ~DEV_RX_OFFLOAD_VLAN_STRIP;
mask |= ETH_VLAN_STRIP_MASK;
}
cur = !!(offload_mask & ETH_VLAN_FILTER_OFFLOAD);
- org = !!(dev->data->dev_conf.rxmode.hw_vlan_filter);
+ org = !!(dev->data->dev_conf.rxmode.offloads &
+ DEV_RX_OFFLOAD_VLAN_FILTER);
if (cur != org) {
- dev->data->dev_conf.rxmode.hw_vlan_filter = (uint8_t)cur;
+ if (cur)
+ dev->data->dev_conf.rxmode.offloads |=
+ DEV_RX_OFFLOAD_VLAN_FILTER;
+ else
+ dev->data->dev_conf.rxmode.offloads &=
+ ~DEV_RX_OFFLOAD_VLAN_FILTER;
mask |= ETH_VLAN_FILTER_MASK;
}
cur = !!(offload_mask & ETH_VLAN_EXTEND_OFFLOAD);
- org = !!(dev->data->dev_conf.rxmode.hw_vlan_extend);
+ org = !!(dev->data->dev_conf.rxmode.offloads &
+ DEV_RX_OFFLOAD_VLAN_EXTEND);
if (cur != org) {
- dev->data->dev_conf.rxmode.hw_vlan_extend = (uint8_t)cur;
+ if (cur)
+ dev->data->dev_conf.rxmode.offloads |=
+ DEV_RX_OFFLOAD_VLAN_EXTEND;
+ else
+ dev->data->dev_conf.rxmode.offloads &=
+ ~DEV_RX_OFFLOAD_VLAN_EXTEND;
mask |= ETH_VLAN_EXTEND_MASK;
}
@@ -2080,13 +2237,26 @@ rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask)
return ret;
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_offload_set, -ENOTSUP);
- (*dev->dev_ops->vlan_offload_set)(dev, mask);
+
+ /*
+ * Convert to the offload bitfield API just in case the underlying PMD
+ * still supporting it.
+ */
+ rte_eth_convert_rx_offloads(dev->data->dev_conf.rxmode.offloads,
+ &dev->data->dev_conf.rxmode);
+ ret = (*dev->dev_ops->vlan_offload_set)(dev, mask);
+ if (ret) {
+ /* hit an error restore original values */
+ dev->data->dev_conf.rxmode.offloads = orig_offloads;
+ rte_eth_convert_rx_offloads(dev->data->dev_conf.rxmode.offloads,
+ &dev->data->dev_conf.rxmode);
+ }
return ret;
}
int
-rte_eth_dev_get_vlan_offload(uint8_t port_id)
+rte_eth_dev_get_vlan_offload(uint16_t port_id)
{
struct rte_eth_dev *dev;
int ret = 0;
@@ -2094,20 +2264,23 @@ rte_eth_dev_get_vlan_offload(uint8_t port_id)
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
dev = &rte_eth_devices[port_id];
- if (dev->data->dev_conf.rxmode.hw_vlan_strip)
+ if (dev->data->dev_conf.rxmode.offloads &
+ DEV_RX_OFFLOAD_VLAN_STRIP)
ret |= ETH_VLAN_STRIP_OFFLOAD;
- if (dev->data->dev_conf.rxmode.hw_vlan_filter)
+ if (dev->data->dev_conf.rxmode.offloads &
+ DEV_RX_OFFLOAD_VLAN_FILTER)
ret |= ETH_VLAN_FILTER_OFFLOAD;
- if (dev->data->dev_conf.rxmode.hw_vlan_extend)
+ if (dev->data->dev_conf.rxmode.offloads &
+ DEV_RX_OFFLOAD_VLAN_EXTEND)
ret |= ETH_VLAN_EXTEND_OFFLOAD;
return ret;
}
int
-rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on)
+rte_eth_dev_set_vlan_pvid(uint16_t port_id, uint16_t pvid, int on)
{
struct rte_eth_dev *dev;
@@ -2120,7 +2293,7 @@ rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on)
}
int
-rte_eth_dev_flow_ctrl_get(uint8_t port_id, struct rte_eth_fc_conf *fc_conf)
+rte_eth_dev_flow_ctrl_get(uint16_t port_id, struct rte_eth_fc_conf *fc_conf)
{
struct rte_eth_dev *dev;
@@ -2132,7 +2305,7 @@ rte_eth_dev_flow_ctrl_get(uint8_t port_id, struct rte_eth_fc_conf *fc_conf)
}
int
-rte_eth_dev_flow_ctrl_set(uint8_t port_id, struct rte_eth_fc_conf *fc_conf)
+rte_eth_dev_flow_ctrl_set(uint16_t port_id, struct rte_eth_fc_conf *fc_conf)
{
struct rte_eth_dev *dev;
@@ -2148,7 +2321,8 @@ rte_eth_dev_flow_ctrl_set(uint8_t port_id, struct rte_eth_fc_conf *fc_conf)
}
int
-rte_eth_dev_priority_flow_ctrl_set(uint8_t port_id, struct rte_eth_pfc_conf *pfc_conf)
+rte_eth_dev_priority_flow_ctrl_set(uint16_t port_id,
+ struct rte_eth_pfc_conf *pfc_conf)
{
struct rte_eth_dev *dev;
@@ -2214,7 +2388,7 @@ rte_eth_check_reta_entry(struct rte_eth_rss_reta_entry64 *reta_conf,
}
int
-rte_eth_dev_rss_reta_update(uint8_t port_id,
+rte_eth_dev_rss_reta_update(uint16_t port_id,
struct rte_eth_rss_reta_entry64 *reta_conf,
uint16_t reta_size)
{
@@ -2240,7 +2414,7 @@ rte_eth_dev_rss_reta_update(uint8_t port_id,
}
int
-rte_eth_dev_rss_reta_query(uint8_t port_id,
+rte_eth_dev_rss_reta_query(uint16_t port_id,
struct rte_eth_rss_reta_entry64 *reta_conf,
uint16_t reta_size)
{
@@ -2260,26 +2434,19 @@ rte_eth_dev_rss_reta_query(uint8_t port_id,
}
int
-rte_eth_dev_rss_hash_update(uint8_t port_id, struct rte_eth_rss_conf *rss_conf)
+rte_eth_dev_rss_hash_update(uint16_t port_id,
+ struct rte_eth_rss_conf *rss_conf)
{
struct rte_eth_dev *dev;
- uint16_t rss_hash_protos;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
- rss_hash_protos = rss_conf->rss_hf;
- if ((rss_hash_protos != 0) &&
- ((rss_hash_protos & ETH_RSS_PROTO_MASK) == 0)) {
- RTE_PMD_DEBUG_TRACE("Invalid rss_hash_protos=0x%x\n",
- rss_hash_protos);
- return -EINVAL;
- }
dev = &rte_eth_devices[port_id];
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_update, -ENOTSUP);
return (*dev->dev_ops->rss_hash_update)(dev, rss_conf);
}
int
-rte_eth_dev_rss_hash_conf_get(uint8_t port_id,
+rte_eth_dev_rss_hash_conf_get(uint16_t port_id,
struct rte_eth_rss_conf *rss_conf)
{
struct rte_eth_dev *dev;
@@ -2291,7 +2458,7 @@ rte_eth_dev_rss_hash_conf_get(uint8_t port_id,
}
int
-rte_eth_dev_udp_tunnel_port_add(uint8_t port_id,
+rte_eth_dev_udp_tunnel_port_add(uint16_t port_id,
struct rte_eth_udp_tunnel *udp_tunnel)
{
struct rte_eth_dev *dev;
@@ -2313,7 +2480,7 @@ rte_eth_dev_udp_tunnel_port_add(uint8_t port_id,
}
int
-rte_eth_dev_udp_tunnel_port_delete(uint8_t port_id,
+rte_eth_dev_udp_tunnel_port_delete(uint16_t port_id,
struct rte_eth_udp_tunnel *udp_tunnel)
{
struct rte_eth_dev *dev;
@@ -2336,7 +2503,7 @@ rte_eth_dev_udp_tunnel_port_delete(uint8_t port_id,
}
int
-rte_eth_led_on(uint8_t port_id)
+rte_eth_led_on(uint16_t port_id)
{
struct rte_eth_dev *dev;
@@ -2347,7 +2514,7 @@ rte_eth_led_on(uint8_t port_id)
}
int
-rte_eth_led_off(uint8_t port_id)
+rte_eth_led_off(uint16_t port_id)
{
struct rte_eth_dev *dev;
@@ -2362,7 +2529,7 @@ rte_eth_led_off(uint8_t port_id)
* an empty spot.
*/
static int
-get_mac_addr_index(uint8_t port_id, const struct ether_addr *addr)
+get_mac_addr_index(uint16_t port_id, const struct ether_addr *addr)
{
struct rte_eth_dev_info dev_info;
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
@@ -2381,7 +2548,7 @@ get_mac_addr_index(uint8_t port_id, const struct ether_addr *addr)
static const struct ether_addr null_mac_addr;
int
-rte_eth_dev_mac_addr_add(uint8_t port_id, struct ether_addr *addr,
+rte_eth_dev_mac_addr_add(uint16_t port_id, struct ether_addr *addr,
uint32_t pool)
{
struct rte_eth_dev *dev;
@@ -2434,7 +2601,7 @@ rte_eth_dev_mac_addr_add(uint8_t port_id, struct ether_addr *addr,
}
int
-rte_eth_dev_mac_addr_remove(uint8_t port_id, struct ether_addr *addr)
+rte_eth_dev_mac_addr_remove(uint16_t port_id, struct ether_addr *addr)
{
struct rte_eth_dev *dev;
int index;
@@ -2463,7 +2630,7 @@ rte_eth_dev_mac_addr_remove(uint8_t port_id, struct ether_addr *addr)
}
int
-rte_eth_dev_default_mac_addr_set(uint8_t port_id, struct ether_addr *addr)
+rte_eth_dev_default_mac_addr_set(uint16_t port_id, struct ether_addr *addr)
{
struct rte_eth_dev *dev;
@@ -2489,7 +2656,7 @@ rte_eth_dev_default_mac_addr_set(uint8_t port_id, struct ether_addr *addr)
* an empty spot.
*/
static int
-get_hash_mac_addr_index(uint8_t port_id, const struct ether_addr *addr)
+get_hash_mac_addr_index(uint16_t port_id, const struct ether_addr *addr)
{
struct rte_eth_dev_info dev_info;
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
@@ -2508,7 +2675,7 @@ get_hash_mac_addr_index(uint8_t port_id, const struct ether_addr *addr)
}
int
-rte_eth_dev_uc_hash_table_set(uint8_t port_id, struct ether_addr *addr,
+rte_eth_dev_uc_hash_table_set(uint16_t port_id, struct ether_addr *addr,
uint8_t on)
{
int index;
@@ -2560,7 +2727,7 @@ rte_eth_dev_uc_hash_table_set(uint8_t port_id, struct ether_addr *addr,
}
int
-rte_eth_dev_uc_all_hash_table_set(uint8_t port_id, uint8_t on)
+rte_eth_dev_uc_all_hash_table_set(uint16_t port_id, uint8_t on)
{
struct rte_eth_dev *dev;
@@ -2572,7 +2739,7 @@ rte_eth_dev_uc_all_hash_table_set(uint8_t port_id, uint8_t on)
return (*dev->dev_ops->uc_all_hash_table_set)(dev, on);
}
-int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx,
+int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
uint16_t tx_rate)
{
struct rte_eth_dev *dev;
@@ -2603,7 +2770,7 @@ int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx,
}
int
-rte_eth_mirror_rule_set(uint8_t port_id,
+rte_eth_mirror_rule_set(uint16_t port_id,
struct rte_eth_mirror_conf *mirror_conf,
uint8_t rule_id, uint8_t on)
{
@@ -2641,7 +2808,7 @@ rte_eth_mirror_rule_set(uint8_t port_id,
}
int
-rte_eth_mirror_rule_reset(uint8_t port_id, uint8_t rule_id)
+rte_eth_mirror_rule_reset(uint16_t port_id, uint8_t rule_id)
{
struct rte_eth_dev *dev;
@@ -2654,7 +2821,7 @@ rte_eth_mirror_rule_reset(uint8_t port_id, uint8_t rule_id)
}
int
-rte_eth_dev_callback_register(uint8_t port_id,
+rte_eth_dev_callback_register(uint16_t port_id,
enum rte_eth_event_type event,
rte_eth_dev_cb_fn cb_fn, void *cb_arg)
{
@@ -2694,7 +2861,7 @@ rte_eth_dev_callback_register(uint8_t port_id,
}
int
-rte_eth_dev_callback_unregister(uint8_t port_id,
+rte_eth_dev_callback_unregister(uint16_t port_id,
enum rte_eth_event_type event,
rte_eth_dev_cb_fn cb_fn, void *cb_arg)
{
@@ -2766,7 +2933,7 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
}
int
-rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data)
+rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data)
{
uint32_t vec;
struct rte_eth_dev *dev;
@@ -2818,16 +2985,11 @@ rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name,
if (mz)
return mz;
- if (rte_xen_dom0_supported())
- return rte_memzone_reserve_bounded(z_name, size, socket_id,
- 0, align, RTE_PGSIZE_2M);
- else
- return rte_memzone_reserve_aligned(z_name, size, socket_id,
- 0, align);
+ return rte_memzone_reserve_aligned(z_name, size, socket_id, 0, align);
}
int
-rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
+rte_eth_dev_rx_intr_ctl_q(uint16_t port_id, uint16_t queue_id,
int epfd, int op, void *data)
{
uint32_t vec;
@@ -2867,7 +3029,7 @@ rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
}
int
-rte_eth_dev_rx_intr_enable(uint8_t port_id,
+rte_eth_dev_rx_intr_enable(uint16_t port_id,
uint16_t queue_id)
{
struct rte_eth_dev *dev;
@@ -2881,7 +3043,7 @@ rte_eth_dev_rx_intr_enable(uint8_t port_id,
}
int
-rte_eth_dev_rx_intr_disable(uint8_t port_id,
+rte_eth_dev_rx_intr_disable(uint16_t port_id,
uint16_t queue_id)
{
struct rte_eth_dev *dev;
@@ -2896,7 +3058,8 @@ rte_eth_dev_rx_intr_disable(uint8_t port_id,
int
-rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_type)
+rte_eth_dev_filter_supported(uint16_t port_id,
+ enum rte_filter_type filter_type)
{
struct rte_eth_dev *dev;
@@ -2909,7 +3072,7 @@ rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_type)
}
int
-rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
+rte_eth_dev_filter_ctrl(uint16_t port_id, enum rte_filter_type filter_type,
enum rte_filter_op filter_op, void *arg)
{
struct rte_eth_dev *dev;
@@ -2922,7 +3085,7 @@ rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
}
void *
-rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+rte_eth_add_rx_callback(uint16_t port_id, uint16_t queue_id,
rte_rx_callback_fn fn, void *user_param)
{
#ifndef RTE_ETHDEV_RXTX_CALLBACKS
@@ -2964,7 +3127,7 @@ rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
}
void *
-rte_eth_add_first_rx_callback(uint8_t port_id, uint16_t queue_id,
+rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id,
rte_rx_callback_fn fn, void *user_param)
{
#ifndef RTE_ETHDEV_RXTX_CALLBACKS
@@ -2999,7 +3162,7 @@ rte_eth_add_first_rx_callback(uint8_t port_id, uint16_t queue_id,
}
void *
-rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+rte_eth_add_tx_callback(uint16_t port_id, uint16_t queue_id,
rte_tx_callback_fn fn, void *user_param)
{
#ifndef RTE_ETHDEV_RXTX_CALLBACKS
@@ -3042,7 +3205,7 @@ rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
}
int
-rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+rte_eth_remove_rx_callback(uint16_t port_id, uint16_t queue_id,
struct rte_eth_rxtx_callback *user_cb)
{
#ifndef RTE_ETHDEV_RXTX_CALLBACKS
@@ -3076,7 +3239,7 @@ rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
}
int
-rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id,
struct rte_eth_rxtx_callback *user_cb)
{
#ifndef RTE_ETHDEV_RXTX_CALLBACKS
@@ -3110,7 +3273,7 @@ rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
}
int
-rte_eth_rx_queue_info_get(uint8_t port_id, uint16_t queue_id,
+rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id,
struct rte_eth_rxq_info *qinfo)
{
struct rte_eth_dev *dev;
@@ -3134,7 +3297,7 @@ rte_eth_rx_queue_info_get(uint8_t port_id, uint16_t queue_id,
}
int
-rte_eth_tx_queue_info_get(uint8_t port_id, uint16_t queue_id,
+rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
struct rte_eth_txq_info *qinfo)
{
struct rte_eth_dev *dev;
@@ -3158,7 +3321,7 @@ rte_eth_tx_queue_info_get(uint8_t port_id, uint16_t queue_id,
}
int
-rte_eth_dev_set_mc_addr_list(uint8_t port_id,
+rte_eth_dev_set_mc_addr_list(uint16_t port_id,
struct ether_addr *mc_addr_set,
uint32_t nb_mc_addr)
{
@@ -3172,7 +3335,7 @@ rte_eth_dev_set_mc_addr_list(uint8_t port_id,
}
int
-rte_eth_timesync_enable(uint8_t port_id)
+rte_eth_timesync_enable(uint16_t port_id)
{
struct rte_eth_dev *dev;
@@ -3184,7 +3347,7 @@ rte_eth_timesync_enable(uint8_t port_id)
}
int
-rte_eth_timesync_disable(uint8_t port_id)
+rte_eth_timesync_disable(uint16_t port_id)
{
struct rte_eth_dev *dev;
@@ -3196,7 +3359,7 @@ rte_eth_timesync_disable(uint8_t port_id)
}
int
-rte_eth_timesync_read_rx_timestamp(uint8_t port_id, struct timespec *timestamp,
+rte_eth_timesync_read_rx_timestamp(uint16_t port_id, struct timespec *timestamp,
uint32_t flags)
{
struct rte_eth_dev *dev;
@@ -3209,7 +3372,8 @@ rte_eth_timesync_read_rx_timestamp(uint8_t port_id, struct timespec *timestamp,
}
int
-rte_eth_timesync_read_tx_timestamp(uint8_t port_id, struct timespec *timestamp)
+rte_eth_timesync_read_tx_timestamp(uint16_t port_id,
+ struct timespec *timestamp)
{
struct rte_eth_dev *dev;
@@ -3221,7 +3385,7 @@ rte_eth_timesync_read_tx_timestamp(uint8_t port_id, struct timespec *timestamp)
}
int
-rte_eth_timesync_adjust_time(uint8_t port_id, int64_t delta)
+rte_eth_timesync_adjust_time(uint16_t port_id, int64_t delta)
{
struct rte_eth_dev *dev;
@@ -3233,7 +3397,7 @@ rte_eth_timesync_adjust_time(uint8_t port_id, int64_t delta)
}
int
-rte_eth_timesync_read_time(uint8_t port_id, struct timespec *timestamp)
+rte_eth_timesync_read_time(uint16_t port_id, struct timespec *timestamp)
{
struct rte_eth_dev *dev;
@@ -3245,7 +3409,7 @@ rte_eth_timesync_read_time(uint8_t port_id, struct timespec *timestamp)
}
int
-rte_eth_timesync_write_time(uint8_t port_id, const struct timespec *timestamp)
+rte_eth_timesync_write_time(uint16_t port_id, const struct timespec *timestamp)
{
struct rte_eth_dev *dev;
@@ -3257,7 +3421,7 @@ rte_eth_timesync_write_time(uint8_t port_id, const struct timespec *timestamp)
}
int
-rte_eth_dev_get_reg_info(uint8_t port_id, struct rte_dev_reg_info *info)
+rte_eth_dev_get_reg_info(uint16_t port_id, struct rte_dev_reg_info *info)
{
struct rte_eth_dev *dev;
@@ -3269,7 +3433,7 @@ rte_eth_dev_get_reg_info(uint8_t port_id, struct rte_dev_reg_info *info)
}
int
-rte_eth_dev_get_eeprom_length(uint8_t port_id)
+rte_eth_dev_get_eeprom_length(uint16_t port_id)
{
struct rte_eth_dev *dev;
@@ -3281,7 +3445,7 @@ rte_eth_dev_get_eeprom_length(uint8_t port_id)
}
int
-rte_eth_dev_get_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info)
+rte_eth_dev_get_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info)
{
struct rte_eth_dev *dev;
@@ -3293,7 +3457,7 @@ rte_eth_dev_get_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info)
}
int
-rte_eth_dev_set_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info)
+rte_eth_dev_set_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info)
{
struct rte_eth_dev *dev;
@@ -3305,7 +3469,7 @@ rte_eth_dev_set_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info)
}
int
-rte_eth_dev_get_dcb_info(uint8_t port_id,
+rte_eth_dev_get_dcb_info(uint16_t port_id,
struct rte_eth_dcb_info *dcb_info)
{
struct rte_eth_dev *dev;
@@ -3320,7 +3484,7 @@ rte_eth_dev_get_dcb_info(uint8_t port_id,
}
int
-rte_eth_dev_l2_tunnel_eth_type_conf(uint8_t port_id,
+rte_eth_dev_l2_tunnel_eth_type_conf(uint16_t port_id,
struct rte_eth_l2_tunnel_conf *l2_tunnel)
{
struct rte_eth_dev *dev;
@@ -3343,7 +3507,7 @@ rte_eth_dev_l2_tunnel_eth_type_conf(uint8_t port_id,
}
int
-rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
+rte_eth_dev_l2_tunnel_offload_set(uint16_t port_id,
struct rte_eth_l2_tunnel_conf *l2_tunnel,
uint32_t mask,
uint8_t en)
@@ -3387,7 +3551,7 @@ rte_eth_dev_adjust_nb_desc(uint16_t *nb_desc,
}
int
-rte_eth_dev_adjust_nb_rx_tx_desc(uint8_t port_id,
+rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t port_id,
uint16_t *nb_rx_desc,
uint16_t *nb_tx_desc)
{
@@ -3409,3 +3573,21 @@ rte_eth_dev_adjust_nb_rx_tx_desc(uint8_t port_id,
return 0;
}
+
+int
+rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool)
+{
+ struct rte_eth_dev *dev;
+
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+ if (pool == NULL)
+ return -EINVAL;
+
+ dev = &rte_eth_devices[port_id];
+
+ if (*dev->dev_ops->pool_ops_supported == NULL)
+ return 1; /* all pools are supported */
+
+ return (*dev->dev_ops->pool_ops_supported)(dev, pool);
+}
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 0adf3274..18e474db 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -180,6 +180,8 @@ extern "C" {
#include <rte_dev.h>
#include <rte_devargs.h>
#include <rte_errno.h>
+#include <rte_common.h>
+
#include "rte_ether.h"
#include "rte_eth_ctrl.h"
#include "rte_dev_info.h"
@@ -348,7 +350,18 @@ struct rte_eth_rxmode {
enum rte_eth_rx_mq_mode mq_mode;
uint32_t max_rx_pkt_len; /**< Only used if jumbo_frame enabled. */
uint16_t split_hdr_size; /**< hdr buf size (header_split enabled).*/
+ /**
+ * Per-port Rx offloads to be set using DEV_RX_OFFLOAD_* flags.
+ * Only offloads set on rx_offload_capa field on rte_eth_dev_info
+ * structure are allowed to be set.
+ */
+ uint64_t offloads;
__extension__
+ /**
+ * Below bitfield API is obsolete. Application should
+ * enable per-port offloads using the offload field
+ * above.
+ */
uint16_t header_split : 1, /**< Header Split enable. */
hw_ip_checksum : 1, /**< IP/UDP/TCP checksum offload enable. */
hw_vlan_filter : 1, /**< VLAN filter enable. */
@@ -357,7 +370,19 @@ struct rte_eth_rxmode {
jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */
hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */
enable_scatter : 1, /**< Enable scatter packets rx handler */
- enable_lro : 1; /**< Enable LRO */
+ enable_lro : 1, /**< Enable LRO */
+ hw_timestamp : 1, /**< Enable HW timestamp */
+ security : 1, /**< Enable rte_security offloads */
+ /**
+ * When set the offload bitfield should be ignored.
+ * Instead per-port Rx offloads should be set on offloads
+ * field above.
+ * Per-queue offloads shuold be set on rte_eth_rxq_conf
+ * structure.
+ * This bit is temporary till rxmode bitfield offloads API will
+ * be deprecated.
+ */
+ ignore_offload_bitfield : 1;
};
/**
@@ -671,6 +696,12 @@ struct rte_eth_vmdq_rx_conf {
*/
struct rte_eth_txmode {
enum rte_eth_tx_mq_mode mq_mode; /**< TX multi-queues mode. */
+ /**
+ * Per-port Tx offloads to be set using DEV_TX_OFFLOAD_* flags.
+ * Only offloads set on tx_offload_capa field on rte_eth_dev_info
+ * structure are allowed to be set.
+ */
+ uint64_t offloads;
/* For i40e specifically */
uint16_t pvid;
@@ -691,6 +722,12 @@ struct rte_eth_rxconf {
uint16_t rx_free_thresh; /**< Drives the freeing of RX descriptors. */
uint8_t rx_drop_en; /**< Drop packets if no descriptors are available. */
uint8_t rx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */
+ /**
+ * Per-queue Rx offloads to be set using DEV_RX_OFFLOAD_* flags.
+ * Only offloads set on rx_queue_offload_capa or rx_offload_capa
+ * fields on rte_eth_dev_info structure are allowed to be set.
+ */
+ uint64_t offloads;
};
#define ETH_TXQ_FLAGS_NOMULTSEGS 0x0001 /**< nb_segs=1 for all mbufs */
@@ -707,6 +744,15 @@ struct rte_eth_rxconf {
(ETH_TXQ_FLAGS_NOXSUMSCTP | ETH_TXQ_FLAGS_NOXSUMUDP | \
ETH_TXQ_FLAGS_NOXSUMTCP)
/**
+ * When set the txq_flags should be ignored,
+ * instead per-queue Tx offloads will be set on offloads field
+ * located on rte_eth_txq_conf struct.
+ * This flag is temporary till the rte_eth_txq_conf.txq_flags
+ * API will be deprecated.
+ */
+#define ETH_TXQ_FLAGS_IGNORE 0x8000
+
+/**
* A structure used to configure a TX ring of an Ethernet port.
*/
struct rte_eth_txconf {
@@ -717,6 +763,12 @@ struct rte_eth_txconf {
uint32_t txq_flags; /**< Set flags for the Tx queue */
uint8_t tx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */
+ /**
+ * Per-queue Tx offloads to be set using DEV_TX_OFFLOAD_* flags.
+ * Only offloads set on tx_queue_offload_capa or tx_offload_capa
+ * fields on rte_eth_dev_info structure are allowed to be set.
+ */
+ uint64_t offloads;
};
/**
@@ -874,7 +926,7 @@ struct rte_eth_conf {
/**< Port dcb RX configuration. */
struct rte_eth_vmdq_rx_conf vmdq_rx_conf;
/**< Port vmdq RX configuration. */
- } rx_adv_conf; /**< Port RX filtering configuration (union). */
+ } rx_adv_conf; /**< Port RX filtering configuration. */
union {
struct rte_eth_vmdq_dcb_tx_conf vmdq_dcb_tx_conf;
/**< Port vmdq+dcb TX configuration. */
@@ -907,6 +959,20 @@ struct rte_eth_conf {
#define DEV_RX_OFFLOAD_QINQ_STRIP 0x00000020
#define DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000040
#define DEV_RX_OFFLOAD_MACSEC_STRIP 0x00000080
+#define DEV_RX_OFFLOAD_HEADER_SPLIT 0x00000100
+#define DEV_RX_OFFLOAD_VLAN_FILTER 0x00000200
+#define DEV_RX_OFFLOAD_VLAN_EXTEND 0x00000400
+#define DEV_RX_OFFLOAD_JUMBO_FRAME 0x00000800
+#define DEV_RX_OFFLOAD_CRC_STRIP 0x00001000
+#define DEV_RX_OFFLOAD_SCATTER 0x00002000
+#define DEV_RX_OFFLOAD_TIMESTAMP 0x00004000
+#define DEV_RX_OFFLOAD_SECURITY 0x00008000
+#define DEV_RX_OFFLOAD_CHECKSUM (DEV_RX_OFFLOAD_IPV4_CKSUM | \
+ DEV_RX_OFFLOAD_UDP_CKSUM | \
+ DEV_RX_OFFLOAD_TCP_CKSUM)
+#define DEV_RX_OFFLOAD_VLAN (DEV_RX_OFFLOAD_VLAN_STRIP | \
+ DEV_RX_OFFLOAD_VLAN_FILTER | \
+ DEV_RX_OFFLOAD_VLAN_EXTEND)
/**
* TX offload capabilities of a device.
@@ -929,6 +995,14 @@ struct rte_eth_conf {
/**< Multiple threads can invoke rte_eth_tx_burst() concurrently on the same
* tx queue without SW lock.
*/
+#define DEV_TX_OFFLOAD_MULTI_SEGS 0x00008000
+/**< Device supports multi segment send. */
+#define DEV_TX_OFFLOAD_MBUF_FAST_FREE 0x00010000
+/**< Device supports optimization for fast release of mbufs.
+ * When set application must guarantee that per-queue all mbufs comes from
+ * the same mempool and has refcnt = 1.
+ */
+#define DEV_TX_OFFLOAD_SECURITY 0x00020000
struct rte_pci_device;
@@ -949,8 +1023,14 @@ struct rte_eth_dev_info {
/** Maximum number of hash MAC addresses for MTA and UTA. */
uint16_t max_vfs; /**< Maximum number of VFs. */
uint16_t max_vmdq_pools; /**< Maximum number of VMDq pools. */
- uint32_t rx_offload_capa; /**< Device RX offload capabilities. */
- uint32_t tx_offload_capa; /**< Device TX offload capabilities. */
+ uint64_t rx_offload_capa;
+ /**< Device per port RX offload capabilities. */
+ uint64_t tx_offload_capa;
+ /**< Device per port TX offload capabilities. */
+ uint64_t rx_queue_offload_capa;
+ /**< Device per queue RX offload capabilities. */
+ uint64_t tx_queue_offload_capa;
+ /**< Device per queue TX offload capabilities. */
uint16_t reta_size;
/**< Device redirection table size, the total number of entries. */
uint8_t hash_key_size; /**< Hash key size in bytes */
@@ -1076,8 +1156,6 @@ TAILQ_HEAD(rte_eth_dev_cb_list, rte_eth_dev_callback);
} \
} while (0)
-#define RTE_ETH_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device)
-
/**
* l2 tunnel configuration.
*/
@@ -1115,6 +1193,9 @@ typedef int (*eth_dev_set_link_down_t)(struct rte_eth_dev *dev);
typedef void (*eth_dev_close_t)(struct rte_eth_dev *dev);
/**< @internal Function used to close a configured Ethernet device. */
+typedef int (*eth_dev_reset_t)(struct rte_eth_dev *dev);
+/** <@internal Function used to reset a configured Ethernet device. */
+
typedef void (*eth_promiscuous_enable_t)(struct rte_eth_dev *dev);
/**< @internal Function used to enable the RX promiscuous mode of an Ethernet device. */
@@ -1131,7 +1212,7 @@ typedef int (*eth_link_update_t)(struct rte_eth_dev *dev,
int wait_to_complete);
/**< @internal Get link speed, duplex mode and state (up/down) of an Ethernet device. */
-typedef void (*eth_stats_get_t)(struct rte_eth_dev *dev,
+typedef int (*eth_stats_get_t)(struct rte_eth_dev *dev,
struct rte_eth_stats *igb_stats);
/**< @internal Get global I/O statistics of an Ethernet device. */
@@ -1245,7 +1326,7 @@ typedef int (*vlan_tpid_set_t)(struct rte_eth_dev *dev,
enum rte_vlan_type type, uint16_t tpid);
/**< @internal set the outer/inner VLAN-TPID by an Ethernet device. */
-typedef void (*vlan_offload_set_t)(struct rte_eth_dev *dev, int mask);
+typedef int (*vlan_offload_set_t)(struct rte_eth_dev *dev, int mask);
/**< @internal set VLAN offload function by an Ethernet device. */
typedef int (*vlan_pvid_set_t)(struct rte_eth_dev *dev,
@@ -1421,10 +1502,17 @@ typedef int (*eth_filter_ctrl_t)(struct rte_eth_dev *dev,
typedef int (*eth_tm_ops_get_t)(struct rte_eth_dev *dev, void *ops);
/**< @internal Get Traffic Management (TM) operations on an Ethernet device */
+typedef int (*eth_mtr_ops_get_t)(struct rte_eth_dev *dev, void *ops);
+/**< @internal Get Trafffic Metering and Policing (MTR) operations */
+
typedef int (*eth_get_dcb_info)(struct rte_eth_dev *dev,
struct rte_eth_dcb_info *dcb_info);
/**< @internal Get dcb information on an Ethernet device */
+typedef int (*eth_pool_ops_supported_t)(struct rte_eth_dev *dev,
+ const char *pool);
+/**< @internal Test if a port supports specific mempool ops */
+
/**
* @internal A structure containing the functions exported by an Ethernet driver.
*/
@@ -1435,6 +1523,7 @@ struct eth_dev_ops {
eth_dev_set_link_up_t dev_set_link_up; /**< Device link up. */
eth_dev_set_link_down_t dev_set_link_down; /**< Device link down. */
eth_dev_close_t dev_close; /**< Close device. */
+ eth_dev_reset_t dev_reset; /**< Reset device. */
eth_link_update_t link_update; /**< Get device link state. */
eth_promiscuous_enable_t promiscuous_enable; /**< Promiscuous ON. */
@@ -1544,6 +1633,12 @@ struct eth_dev_ops {
eth_tm_ops_get_t tm_ops_get;
/**< Get Traffic Management (TM) operations. */
+
+ eth_mtr_ops_get_t mtr_ops_get;
+ /**< Get Traffic Metering and Policing (MTR) operations. */
+
+ eth_pool_ops_supported_t pool_ops_supported;
+ /**< Test if a port supports specific mempool ops */
};
/**
@@ -1568,7 +1663,7 @@ struct eth_dev_ops {
* @return
* The number of packets returned to the user.
*/
-typedef uint16_t (*rte_rx_callback_fn)(uint8_t port, uint16_t queue,
+typedef uint16_t (*rte_rx_callback_fn)(uint16_t port, uint16_t queue,
struct rte_mbuf *pkts[], uint16_t nb_pkts, uint16_t max_pkts,
void *user_param);
@@ -1592,7 +1687,7 @@ typedef uint16_t (*rte_rx_callback_fn)(uint8_t port, uint16_t queue,
* @return
* The number of packets to be written to the NIC.
*/
-typedef uint16_t (*rte_tx_callback_fn)(uint8_t port, uint16_t queue,
+typedef uint16_t (*rte_tx_callback_fn)(uint16_t port, uint16_t queue,
struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param);
/**
@@ -1649,8 +1744,12 @@ struct rte_eth_dev {
*/
struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT];
enum rte_eth_dev_state state; /**< Flag indicating the port state */
+ void *security_ctx; /**< Context for security ops */
} __rte_cache_aligned;
+void *
+rte_eth_dev_get_sec_ctx(uint8_t port_id);
+
struct rte_eth_dev_sriov {
uint8_t active; /**< SRIOV is active with 16, 32 or 64 pools */
uint8_t nb_q_per_pool; /**< rx queue number per pool */
@@ -1695,7 +1794,7 @@ struct rte_eth_dev_data {
/** bitmap array of associating Ethernet MAC addresses to pools */
struct ether_addr* hash_mac_addrs;
/** Device Ethernet MAC addresses of hash filtering. */
- uint8_t port_id; /**< Device [external] port identifier. */
+ uint16_t port_id; /**< Device [external] port identifier. */
__extension__
uint8_t promiscuous : 1, /**< RX promiscuous mode ON(1) / OFF(0). */
scattered_rx : 1, /**< RX of scattered packets is ON(1) / OFF(0) */
@@ -1713,8 +1812,6 @@ struct rte_eth_dev_data {
/**< VLAN filter configuration. */
};
-/** Device supports hotplug detach */
-#define RTE_ETH_DEV_DETACHABLE 0x0001
/** Device supports link state interrupt */
#define RTE_ETH_DEV_INTR_LSC 0x0002
/** Device is a bonded slave */
@@ -1737,7 +1834,7 @@ extern struct rte_eth_dev rte_eth_devices[];
* @return
* Next valid port id, RTE_MAX_ETHPORTS if there is none.
*/
-uint8_t rte_eth_find_next(uint8_t port_id);
+uint16_t rte_eth_find_next(uint16_t port_id);
/**
* Macro to iterate over all enabled ethdev ports.
@@ -1760,7 +1857,7 @@ uint8_t rte_eth_find_next(uint8_t port_id);
* @return
* - The total number of usable Ethernet devices.
*/
-uint8_t rte_eth_dev_count(void);
+uint16_t rte_eth_dev_count(void);
/**
* @internal
@@ -1821,7 +1918,7 @@ int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev);
* @return
* 0 on success and port_id is filled, negative on error
*/
-int rte_eth_dev_attach(const char *devargs, uint8_t *port_id);
+int rte_eth_dev_attach(const char *devargs, uint16_t *port_id);
/**
* Detach a Ethernet device specified by port identifier.
@@ -1836,7 +1933,7 @@ int rte_eth_dev_attach(const char *devargs, uint8_t *port_id);
* @return
* 0 on success and devname is filled, negative on error
*/
-int rte_eth_dev_detach(uint8_t port_id, char *devname);
+int rte_eth_dev_detach(uint16_t port_id, char *devname);
/**
* Convert a numerical speed in Mbps to a bitmap flag that can be used in
@@ -1870,6 +1967,9 @@ uint32_t rte_eth_speed_bitflag(uint32_t speed, int duplex);
* each statically configurable offload hardware feature provided by
* Ethernet devices, such as IP checksum or VLAN tag stripping for
* example.
+ * The Rx offload bitfield API is obsolete and will be deprecated.
+ * Applications should set the ignore_bitfield_offloads bit on *rxmode*
+ * structure and use offloads field to set per-port offloads instead.
* - the Receive Side Scaling (RSS) configuration when using multiple RX
* queues per port.
*
@@ -1880,7 +1980,7 @@ uint32_t rte_eth_speed_bitflag(uint32_t speed, int duplex);
* - 0: Success, device configured.
* - <0: Error code returned by the driver configuration function.
*/
-int rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_queue,
+int rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_queue,
uint16_t nb_tx_queue, const struct rte_eth_conf *eth_conf);
/**
@@ -1923,6 +2023,8 @@ void _rte_eth_dev_reset(struct rte_eth_dev *dev);
* The *rx_conf* structure contains an *rx_thresh* structure with the values
* of the Prefetch, Host, and Write-Back threshold registers of the receive
* ring.
+ * In addition it contains the hardware offloads features to activate using
+ * the DEV_RX_OFFLOAD_* flags.
* @param mb_pool
* The pointer to the memory pool from which to allocate *rte_mbuf* network
* memory buffers to populate each descriptor of the receive ring.
@@ -1935,7 +2037,7 @@ void _rte_eth_dev_reset(struct rte_eth_dev *dev);
* allocate network memory buffers from the memory pool when
* initializing receive descriptors.
*/
-int rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id,
+int rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
uint16_t nb_rx_desc, unsigned int socket_id,
const struct rte_eth_rxconf *rx_conf,
struct rte_mempool *mb_pool);
@@ -1976,6 +2078,11 @@ int rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id,
* - The *txq_flags* member contains flags to pass to the TX queue setup
* function to configure the behavior of the TX queue. This should be set
* to 0 if no special configuration is required.
+ * This API is obsolete and will be deprecated. Applications
+ * should set it to ETH_TXQ_FLAGS_IGNORE and use
+ * the offloads field below.
+ * - The *offloads* member contains Tx offloads to be enabled.
+ * Offloads which are not set cannot be used on the datapath.
*
* Note that setting *tx_free_thresh* or *tx_rs_thresh* value to 0 forces
* the transmit function to use default values.
@@ -1983,7 +2090,7 @@ int rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id,
* - 0: Success, the transmit queue is correctly set up.
* - -ENOMEM: Unable to allocate the transmit ring descriptors.
*/
-int rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id,
+int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
uint16_t nb_tx_desc, unsigned int socket_id,
const struct rte_eth_txconf *tx_conf);
@@ -1997,7 +2104,7 @@ int rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id,
* a default of zero if the socket could not be determined.
* -1 is returned is the port_id value is out of range.
*/
-int rte_eth_dev_socket_id(uint8_t port_id);
+int rte_eth_dev_socket_id(uint16_t port_id);
/**
* Check if port_id of device is attached
@@ -2008,7 +2115,7 @@ int rte_eth_dev_socket_id(uint8_t port_id);
* - 0 if port is out of range or not attached
* - 1 if device is attached
*/
-int rte_eth_dev_is_valid_port(uint8_t port_id);
+int rte_eth_dev_is_valid_port(uint16_t port_id);
/**
* Start specified RX queue of a port. It is used when rx_deferred_start
@@ -2025,7 +2132,7 @@ int rte_eth_dev_is_valid_port(uint8_t port_id);
* - -EINVAL: The port_id or the queue_id out of range.
* - -ENOTSUP: The function not supported in PMD driver.
*/
-int rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id);
+int rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id);
/**
* Stop specified RX queue of a port
@@ -2041,7 +2148,7 @@ int rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id);
* - -EINVAL: The port_id or the queue_id out of range.
* - -ENOTSUP: The function not supported in PMD driver.
*/
-int rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id);
+int rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id);
/**
* Start TX for specified queue of a port. It is used when tx_deferred_start
@@ -2058,7 +2165,7 @@ int rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id);
* - -EINVAL: The port_id or the queue_id out of range.
* - -ENOTSUP: The function not supported in PMD driver.
*/
-int rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id);
+int rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id);
/**
* Stop specified TX queue of a port
@@ -2074,7 +2181,7 @@ int rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id);
* - -EINVAL: The port_id or the queue_id out of range.
* - -ENOTSUP: The function not supported in PMD driver.
*/
-int rte_eth_dev_tx_queue_stop(uint8_t port_id, uint16_t tx_queue_id);
+int rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id);
@@ -2093,7 +2200,7 @@ int rte_eth_dev_tx_queue_stop(uint8_t port_id, uint16_t tx_queue_id);
* - 0: Success, Ethernet device started.
* - <0: Error code of the driver device start function.
*/
-int rte_eth_dev_start(uint8_t port_id);
+int rte_eth_dev_start(uint16_t port_id);
/**
* Stop an Ethernet device. The device can be restarted with a call to
@@ -2102,7 +2209,7 @@ int rte_eth_dev_start(uint8_t port_id);
* @param port_id
* The port identifier of the Ethernet device.
*/
-void rte_eth_dev_stop(uint8_t port_id);
+void rte_eth_dev_stop(uint16_t port_id);
/**
@@ -2117,7 +2224,7 @@ void rte_eth_dev_stop(uint8_t port_id);
* - 0: Success, Ethernet device linked up.
* - <0: Error code of the driver device link up function.
*/
-int rte_eth_dev_set_link_up(uint8_t port_id);
+int rte_eth_dev_set_link_up(uint16_t port_id);
/**
* Link down an Ethernet device.
@@ -2128,7 +2235,7 @@ int rte_eth_dev_set_link_up(uint8_t port_id);
* @param port_id
* The port identifier of the Ethernet device.
*/
-int rte_eth_dev_set_link_down(uint8_t port_id);
+int rte_eth_dev_set_link_down(uint16_t port_id);
/**
* Close a stopped Ethernet device. The device cannot be restarted!
@@ -2138,7 +2245,46 @@ int rte_eth_dev_set_link_down(uint8_t port_id);
* @param port_id
* The port identifier of the Ethernet device.
*/
-void rte_eth_dev_close(uint8_t port_id);
+void rte_eth_dev_close(uint16_t port_id);
+
+/**
+ * Reset a Ethernet device and keep its port id.
+ *
+ * When a port has to be reset passively, the DPDK application can invoke
+ * this function. For example when a PF is reset, all its VFs should also
+ * be reset. Normally a DPDK application can invoke this function when
+ * RTE_ETH_EVENT_INTR_RESET event is detected, but can also use it to start
+ * a port reset in other circumstances.
+ *
+ * When this function is called, it first stops the port and then calls the
+ * PMD specific dev_uninit( ) and dev_init( ) to return the port to initial
+ * state, in which no Tx and Rx queues are setup, as if the port has been
+ * reset and not started. The port keeps the port id it had before the
+ * function call.
+ *
+ * After calling rte_eth_dev_reset( ), the application should use
+ * rte_eth_dev_configure( ), rte_eth_rx_queue_setup( ),
+ * rte_eth_tx_queue_setup( ), and rte_eth_dev_start( )
+ * to reconfigure the device as appropriate.
+ *
+ * Note: To avoid unexpected behavior, the application should stop calling
+ * Tx and Rx functions before calling rte_eth_dev_reset( ). For thread
+ * safety, all these controlling functions should be called from the same
+ * thread.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ *
+ * @return
+ * - (0) if successful.
+ * - (-EINVAL) if port identifier is invalid.
+ * - (-ENOTSUP) if hardware doesn't support this function.
+ * - (-EPERM) if not ran from the primary process.
+ * - (-EIO) if re-initialisation failed.
+ * - (-ENOMEM) if the reset failed due to OOM.
+ * - (-EAGAIN) if the reset temporarily failed and should be retried later.
+ */
+int rte_eth_dev_reset(uint16_t port_id);
/**
* Enable receipt in promiscuous mode for an Ethernet device.
@@ -2146,7 +2292,7 @@ void rte_eth_dev_close(uint8_t port_id);
* @param port_id
* The port identifier of the Ethernet device.
*/
-void rte_eth_promiscuous_enable(uint8_t port_id);
+void rte_eth_promiscuous_enable(uint16_t port_id);
/**
* Disable receipt in promiscuous mode for an Ethernet device.
@@ -2154,7 +2300,7 @@ void rte_eth_promiscuous_enable(uint8_t port_id);
* @param port_id
* The port identifier of the Ethernet device.
*/
-void rte_eth_promiscuous_disable(uint8_t port_id);
+void rte_eth_promiscuous_disable(uint16_t port_id);
/**
* Return the value of promiscuous mode for an Ethernet device.
@@ -2166,7 +2312,7 @@ void rte_eth_promiscuous_disable(uint8_t port_id);
* - (0) if promiscuous is disabled.
* - (-1) on error
*/
-int rte_eth_promiscuous_get(uint8_t port_id);
+int rte_eth_promiscuous_get(uint16_t port_id);
/**
* Enable the receipt of any multicast frame by an Ethernet device.
@@ -2174,7 +2320,7 @@ int rte_eth_promiscuous_get(uint8_t port_id);
* @param port_id
* The port identifier of the Ethernet device.
*/
-void rte_eth_allmulticast_enable(uint8_t port_id);
+void rte_eth_allmulticast_enable(uint16_t port_id);
/**
* Disable the receipt of all multicast frames by an Ethernet device.
@@ -2182,7 +2328,7 @@ void rte_eth_allmulticast_enable(uint8_t port_id);
* @param port_id
* The port identifier of the Ethernet device.
*/
-void rte_eth_allmulticast_disable(uint8_t port_id);
+void rte_eth_allmulticast_disable(uint16_t port_id);
/**
* Return the value of allmulticast mode for an Ethernet device.
@@ -2194,7 +2340,7 @@ void rte_eth_allmulticast_disable(uint8_t port_id);
* - (0) if allmulticast is disabled.
* - (-1) on error
*/
-int rte_eth_allmulticast_get(uint8_t port_id);
+int rte_eth_allmulticast_get(uint16_t port_id);
/**
* Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX
@@ -2207,7 +2353,7 @@ int rte_eth_allmulticast_get(uint8_t port_id);
* A pointer to an *rte_eth_link* structure to be filled with
* the status, the speed and the mode of the Ethernet device link.
*/
-void rte_eth_link_get(uint8_t port_id, struct rte_eth_link *link);
+void rte_eth_link_get(uint16_t port_id, struct rte_eth_link *link);
/**
* Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX
@@ -2220,7 +2366,7 @@ void rte_eth_link_get(uint8_t port_id, struct rte_eth_link *link);
* A pointer to an *rte_eth_link* structure to be filled with
* the status, the speed and the mode of the Ethernet device link.
*/
-void rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *link);
+void rte_eth_link_get_nowait(uint16_t port_id, struct rte_eth_link *link);
/**
* Retrieve the general I/O statistics of an Ethernet device.
@@ -2239,15 +2385,19 @@ void rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *link);
* @return
* Zero if successful. Non-zero otherwise.
*/
-int rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats);
+int rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats);
/**
* Reset the general I/O statistics of an Ethernet device.
*
* @param port_id
* The port identifier of the Ethernet device.
+ * @return
+ * - (0) if device notified to reset stats.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
*/
-void rte_eth_stats_reset(uint8_t port_id);
+int rte_eth_stats_reset(uint16_t port_id);
/**
* Retrieve names of extended statistics of an Ethernet device.
@@ -2269,7 +2419,7 @@ void rte_eth_stats_reset(uint8_t port_id);
* shall not be used by the caller.
* - A negative value on error (invalid port id).
*/
-int rte_eth_xstats_get_names(uint8_t port_id,
+int rte_eth_xstats_get_names(uint16_t port_id,
struct rte_eth_xstat_name *xstats_names,
unsigned int size);
@@ -2295,7 +2445,7 @@ int rte_eth_xstats_get_names(uint8_t port_id,
* shall not be used by the caller.
* - A negative value on error (invalid port id).
*/
-int rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats,
+int rte_eth_xstats_get(uint16_t port_id, struct rte_eth_xstat *xstats,
unsigned int n);
/**
@@ -2321,7 +2471,7 @@ int rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats,
* - A negative value on error (invalid port id).
*/
int
-rte_eth_xstats_get_names_by_id(uint8_t port_id,
+rte_eth_xstats_get_names_by_id(uint16_t port_id,
struct rte_eth_xstat_name *xstats_names, unsigned int size,
uint64_t *ids);
@@ -2333,23 +2483,23 @@ rte_eth_xstats_get_names_by_id(uint8_t port_id,
* @param ids
* A pointer to an ids array passed by application. This tells which
* statistics values function should retrieve. This parameter
- * can be set to NULL if n is 0. In this case function will retrieve
+ * can be set to NULL if size is 0. In this case function will retrieve
* all avalible statistics.
* @param values
* A pointer to a table to be filled with device statistics values.
- * @param n
+ * @param size
* The size of the ids array (number of elements).
* @return
- * - A positive value lower or equal to n: success. The return value
+ * - A positive value lower or equal to size: success. The return value
* is the number of entries filled in the stats table.
- * - A positive value higher than n: error, the given statistics table
+ * - A positive value higher than size: error, the given statistics table
* is too small. The return value corresponds to the size that should
* be given to succeed. The entries in the table are not valid and
* shall not be used by the caller.
* - A negative value on error (invalid port id).
*/
-int rte_eth_xstats_get_by_id(uint8_t port_id, const uint64_t *ids,
- uint64_t *values, unsigned int n);
+int rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids,
+ uint64_t *values, unsigned int size);
/**
* Gets the ID of a statistic from its name.
@@ -2368,7 +2518,7 @@ int rte_eth_xstats_get_by_id(uint8_t port_id, const uint64_t *ids,
* -ENODEV for invalid port_id,
* -EINVAL if the xstat_name doesn't exist in port_id
*/
-int rte_eth_xstats_get_id_by_name(uint8_t port_id, const char *xstat_name,
+int rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name,
uint64_t *id);
/**
@@ -2377,7 +2527,7 @@ int rte_eth_xstats_get_id_by_name(uint8_t port_id, const char *xstat_name,
* @param port_id
* The port identifier of the Ethernet device.
*/
-void rte_eth_xstats_reset(uint8_t port_id);
+void rte_eth_xstats_reset(uint16_t port_id);
/**
* Set a mapping for the specified transmit queue to the specified per-queue
@@ -2396,7 +2546,7 @@ void rte_eth_xstats_reset(uint8_t port_id);
* @return
* Zero if successful. Non-zero otherwise.
*/
-int rte_eth_dev_set_tx_queue_stats_mapping(uint8_t port_id,
+int rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id,
uint16_t tx_queue_id, uint8_t stat_idx);
/**
@@ -2416,7 +2566,7 @@ int rte_eth_dev_set_tx_queue_stats_mapping(uint8_t port_id,
* @return
* Zero if successful. Non-zero otherwise.
*/
-int rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id,
+int rte_eth_dev_set_rx_queue_stats_mapping(uint16_t port_id,
uint16_t rx_queue_id,
uint8_t stat_idx);
@@ -2429,7 +2579,7 @@ int rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id,
* A pointer to a structure of type *ether_addr* to be filled with
* the Ethernet address of the Ethernet device.
*/
-void rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr);
+void rte_eth_macaddr_get(uint16_t port_id, struct ether_addr *mac_addr);
/**
* Retrieve the contextual information of an Ethernet device.
@@ -2440,7 +2590,7 @@ void rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr);
* A pointer to a structure of type *rte_eth_dev_info* to be filled with
* the contextual information of the Ethernet device.
*/
-void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info);
+void rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info);
/**
* Retrieve the firmware version of a device.
@@ -2460,7 +2610,7 @@ void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info);
* - (>0) if *fw_size* is not enough to store firmware version, return
* the size of the non truncated string.
*/
-int rte_eth_dev_fw_version_get(uint8_t port_id,
+int rte_eth_dev_fw_version_get(uint16_t port_id,
char *fw_version, size_t fw_size);
/**
@@ -2501,7 +2651,7 @@ int rte_eth_dev_fw_version_get(uint8_t port_id,
* count of supported ptypes will be returned.
* - (-ENODEV) if *port_id* invalid.
*/
-int rte_eth_dev_get_supported_ptypes(uint8_t port_id, uint32_t ptype_mask,
+int rte_eth_dev_get_supported_ptypes(uint16_t port_id, uint32_t ptype_mask,
uint32_t *ptypes, int num);
/**
@@ -2515,7 +2665,7 @@ int rte_eth_dev_get_supported_ptypes(uint8_t port_id, uint32_t ptype_mask,
* - (0) if successful.
* - (-ENODEV) if *port_id* invalid.
*/
-int rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu);
+int rte_eth_dev_get_mtu(uint16_t port_id, uint16_t *mtu);
/**
* Change the MTU of an Ethernet device.
@@ -2531,7 +2681,7 @@ int rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu);
* - (-EINVAL) if *mtu* invalid.
* - (-EBUSY) if operation is not allowed when the port is running
*/
-int rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu);
+int rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu);
/**
* Enable/Disable hardware filtering by an Ethernet device of received
@@ -2551,7 +2701,7 @@ int rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu);
* - (-ENOSYS) if VLAN filtering on *port_id* disabled.
* - (-EINVAL) if *vlan_id* > 4095.
*/
-int rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on);
+int rte_eth_dev_vlan_filter(uint16_t port_id, uint16_t vlan_id, int on);
/**
* Enable/Disable hardware VLAN Strip by a rx queue of an Ethernet device.
@@ -2572,7 +2722,7 @@ int rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on);
* - (-ENODEV) if *port_id* invalid.
* - (-EINVAL) if *rx_queue_id* invalid.
*/
-int rte_eth_dev_set_vlan_strip_on_queue(uint8_t port_id, uint16_t rx_queue_id,
+int rte_eth_dev_set_vlan_strip_on_queue(uint16_t port_id, uint16_t rx_queue_id,
int on);
/**
@@ -2591,7 +2741,7 @@ int rte_eth_dev_set_vlan_strip_on_queue(uint8_t port_id, uint16_t rx_queue_id,
* - (-ENOSUP) if hardware-assisted VLAN TPID setup is not supported.
* - (-ENODEV) if *port_id* invalid.
*/
-int rte_eth_dev_set_vlan_ether_type(uint8_t port_id,
+int rte_eth_dev_set_vlan_ether_type(uint16_t port_id,
enum rte_vlan_type vlan_type,
uint16_t tag_type);
@@ -2615,7 +2765,7 @@ int rte_eth_dev_set_vlan_ether_type(uint8_t port_id,
* - (-ENOSUP) if hardware-assisted VLAN filtering not configured.
* - (-ENODEV) if *port_id* invalid.
*/
-int rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask);
+int rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask);
/**
* Read VLAN Offload configuration from an Ethernet device
@@ -2629,7 +2779,7 @@ int rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask);
* ETH_VLAN_EXTEND_OFFLOAD
* - (-ENODEV) if *port_id* invalid.
*/
-int rte_eth_dev_get_vlan_offload(uint8_t port_id);
+int rte_eth_dev_get_vlan_offload(uint16_t port_id);
/**
* Set port based TX VLAN insertion on or off.
@@ -2645,7 +2795,7 @@ int rte_eth_dev_get_vlan_offload(uint8_t port_id);
* - (0) if successful.
* - negative if failed.
*/
-int rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on);
+int rte_eth_dev_set_vlan_pvid(uint16_t port_id, uint16_t pvid, int on);
/**
*
@@ -2730,7 +2880,7 @@ int rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on);
* *rx_pkts* array.
*/
static inline uint16_t
-rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
+rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
struct rte_mbuf **rx_pkts, const uint16_t nb_pkts)
{
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
@@ -2775,7 +2925,7 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id,
* (-ENOTSUP) if the device does not support this function
*/
static inline int
-rte_eth_rx_queue_count(uint8_t port_id, uint16_t queue_id)
+rte_eth_rx_queue_count(uint16_t port_id, uint16_t queue_id)
{
struct rte_eth_dev *dev;
@@ -2804,7 +2954,7 @@ rte_eth_rx_queue_count(uint8_t port_id, uint16_t queue_id)
* - (-ENOTSUP) if the device does not support this function
*/
static inline int
-rte_eth_rx_descriptor_done(uint8_t port_id, uint16_t queue_id, uint16_t offset)
+rte_eth_rx_descriptor_done(uint16_t port_id, uint16_t queue_id, uint16_t offset)
{
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
@@ -2851,7 +3001,7 @@ rte_eth_rx_descriptor_done(uint8_t port_id, uint16_t queue_id, uint16_t offset)
* - (-ENODEV) bad port or queue (only if compiled with debug).
*/
static inline int
-rte_eth_rx_descriptor_status(uint8_t port_id, uint16_t queue_id,
+rte_eth_rx_descriptor_status(uint16_t port_id, uint16_t queue_id,
uint16_t offset)
{
struct rte_eth_dev *dev;
@@ -2908,7 +3058,7 @@ rte_eth_rx_descriptor_status(uint8_t port_id, uint16_t queue_id,
* - (-ENOTSUP) if the device does not support this function.
* - (-ENODEV) bad port or queue (only if compiled with debug).
*/
-static inline int rte_eth_tx_descriptor_status(uint8_t port_id,
+static inline int rte_eth_tx_descriptor_status(uint16_t port_id,
uint16_t queue_id, uint16_t offset)
{
struct rte_eth_dev *dev;
@@ -2992,7 +3142,7 @@ static inline int rte_eth_tx_descriptor_status(uint8_t port_id,
* *tx_pkts* parameter when the transmit ring is full or has been filled up.
*/
static inline uint16_t
-rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
+rte_eth_tx_burst(uint16_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
@@ -3081,7 +3231,7 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
#ifndef RTE_ETHDEV_TX_PREPARE_NOOP
static inline uint16_t
-rte_eth_tx_prepare(uint8_t port_id, uint16_t queue_id,
+rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id,
struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
struct rte_eth_dev *dev;
@@ -3123,7 +3273,8 @@ rte_eth_tx_prepare(uint8_t port_id, uint16_t queue_id,
*/
static inline uint16_t
-rte_eth_tx_prepare(__rte_unused uint8_t port_id, __rte_unused uint16_t queue_id,
+rte_eth_tx_prepare(__rte_unused uint16_t port_id,
+ __rte_unused uint16_t queue_id,
__rte_unused struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
return nb_pkts;
@@ -3192,7 +3343,7 @@ rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size);
* callback is called for any packets which could not be sent.
*/
static inline uint16_t
-rte_eth_tx_buffer_flush(uint8_t port_id, uint16_t queue_id,
+rte_eth_tx_buffer_flush(uint16_t port_id, uint16_t queue_id,
struct rte_eth_dev_tx_buffer *buffer)
{
uint16_t sent;
@@ -3244,7 +3395,7 @@ rte_eth_tx_buffer_flush(uint8_t port_id, uint16_t queue_id,
* the rest.
*/
static __rte_always_inline uint16_t
-rte_eth_tx_buffer(uint8_t port_id, uint16_t queue_id,
+rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id,
struct rte_eth_dev_tx_buffer *buffer, struct rte_mbuf *tx_pkt)
{
buffer->pkts[buffer->length++] = tx_pkt;
@@ -3360,7 +3511,7 @@ rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent,
* are in use.
*/
int
-rte_eth_tx_done_cleanup(uint8_t port_id, uint16_t queue_id, uint32_t free_cnt);
+rte_eth_tx_done_cleanup(uint16_t port_id, uint16_t queue_id, uint32_t free_cnt);
/**
* The eth device event type for interrupt, and maybe others in the future.
@@ -3378,7 +3529,7 @@ enum rte_eth_event_type {
RTE_ETH_EVENT_MAX /**< max value of this enum */
};
-typedef int (*rte_eth_dev_cb_fn)(uint8_t port_id,
+typedef int (*rte_eth_dev_cb_fn)(uint16_t port_id,
enum rte_eth_event_type event, void *cb_arg, void *ret_param);
/**< user application callback to be registered for interrupts */
@@ -3400,7 +3551,7 @@ typedef int (*rte_eth_dev_cb_fn)(uint8_t port_id,
* - On success, zero.
* - On failure, a negative value.
*/
-int rte_eth_dev_callback_register(uint8_t port_id,
+int rte_eth_dev_callback_register(uint16_t port_id,
enum rte_eth_event_type event,
rte_eth_dev_cb_fn cb_fn, void *cb_arg);
@@ -3421,7 +3572,7 @@ int rte_eth_dev_callback_register(uint8_t port_id,
* - On success, zero.
* - On failure, a negative value.
*/
-int rte_eth_dev_callback_unregister(uint8_t port_id,
+int rte_eth_dev_callback_unregister(uint16_t port_id,
enum rte_eth_event_type event,
rte_eth_dev_cb_fn cb_fn, void *cb_arg);
@@ -3467,7 +3618,7 @@ int _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
* that operation.
* - (-ENODEV) if *port_id* invalid.
*/
-int rte_eth_dev_rx_intr_enable(uint8_t port_id, uint16_t queue_id);
+int rte_eth_dev_rx_intr_enable(uint16_t port_id, uint16_t queue_id);
/**
* When lcore wakes up from rx interrupt indicating packet coming, disable rx
@@ -3488,7 +3639,7 @@ int rte_eth_dev_rx_intr_enable(uint8_t port_id, uint16_t queue_id);
* that operation.
* - (-ENODEV) if *port_id* invalid.
*/
-int rte_eth_dev_rx_intr_disable(uint8_t port_id, uint16_t queue_id);
+int rte_eth_dev_rx_intr_disable(uint16_t port_id, uint16_t queue_id);
/**
* RX Interrupt control per port.
@@ -3507,7 +3658,7 @@ int rte_eth_dev_rx_intr_disable(uint8_t port_id, uint16_t queue_id);
* - On success, zero.
* - On failure, a negative value.
*/
-int rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data);
+int rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data);
/**
* RX Interrupt control per queue.
@@ -3530,7 +3681,7 @@ int rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data);
* - On success, zero.
* - On failure, a negative value.
*/
-int rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
+int rte_eth_dev_rx_intr_ctl_q(uint16_t port_id, uint16_t queue_id,
int epfd, int op, void *data);
/**
@@ -3545,7 +3696,7 @@ int rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
* that operation.
* - (-ENODEV) if *port_id* invalid.
*/
-int rte_eth_led_on(uint8_t port_id);
+int rte_eth_led_on(uint16_t port_id);
/**
* Turn off the LED on the Ethernet device.
@@ -3559,7 +3710,7 @@ int rte_eth_led_on(uint8_t port_id);
* that operation.
* - (-ENODEV) if *port_id* invalid.
*/
-int rte_eth_led_off(uint8_t port_id);
+int rte_eth_led_off(uint16_t port_id);
/**
* Get current status of the Ethernet link flow control for Ethernet device
@@ -3573,7 +3724,7 @@ int rte_eth_led_off(uint8_t port_id);
* - (-ENOTSUP) if hardware doesn't support flow control.
* - (-ENODEV) if *port_id* invalid.
*/
-int rte_eth_dev_flow_ctrl_get(uint8_t port_id,
+int rte_eth_dev_flow_ctrl_get(uint16_t port_id,
struct rte_eth_fc_conf *fc_conf);
/**
@@ -3590,7 +3741,7 @@ int rte_eth_dev_flow_ctrl_get(uint8_t port_id,
* - (-EINVAL) if bad parameter
* - (-EIO) if flow control setup failure
*/
-int rte_eth_dev_flow_ctrl_set(uint8_t port_id,
+int rte_eth_dev_flow_ctrl_set(uint16_t port_id,
struct rte_eth_fc_conf *fc_conf);
/**
@@ -3608,7 +3759,7 @@ int rte_eth_dev_flow_ctrl_set(uint8_t port_id,
* - (-EINVAL) if bad parameter
* - (-EIO) if flow control setup failure
*/
-int rte_eth_dev_priority_flow_ctrl_set(uint8_t port_id,
+int rte_eth_dev_priority_flow_ctrl_set(uint16_t port_id,
struct rte_eth_pfc_conf *pfc_conf);
/**
@@ -3629,7 +3780,7 @@ int rte_eth_dev_priority_flow_ctrl_set(uint8_t port_id,
* - (-ENOSPC) if no more MAC addresses can be added.
* - (-EINVAL) if MAC address is invalid.
*/
-int rte_eth_dev_mac_addr_add(uint8_t port, struct ether_addr *mac_addr,
+int rte_eth_dev_mac_addr_add(uint16_t port, struct ether_addr *mac_addr,
uint32_t pool);
/**
@@ -3645,7 +3796,7 @@ int rte_eth_dev_mac_addr_add(uint8_t port, struct ether_addr *mac_addr,
* - (-ENODEV) if *port* invalid.
* - (-EADDRINUSE) if attempting to remove the default MAC address
*/
-int rte_eth_dev_mac_addr_remove(uint8_t port, struct ether_addr *mac_addr);
+int rte_eth_dev_mac_addr_remove(uint16_t port, struct ether_addr *mac_addr);
/**
* Set the default MAC address.
@@ -3660,8 +3811,8 @@ int rte_eth_dev_mac_addr_remove(uint8_t port, struct ether_addr *mac_addr);
* - (-ENODEV) if *port* invalid.
* - (-EINVAL) if MAC address is invalid.
*/
-int rte_eth_dev_default_mac_addr_set(uint8_t port, struct ether_addr *mac_addr);
-
+int rte_eth_dev_default_mac_addr_set(uint16_t port,
+ struct ether_addr *mac_addr);
/**
* Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device.
@@ -3678,7 +3829,7 @@ int rte_eth_dev_default_mac_addr_set(uint8_t port, struct ether_addr *mac_addr);
* - (-ENOTSUP) if hardware doesn't support.
* - (-EINVAL) if bad parameter.
*/
-int rte_eth_dev_rss_reta_update(uint8_t port,
+int rte_eth_dev_rss_reta_update(uint16_t port,
struct rte_eth_rss_reta_entry64 *reta_conf,
uint16_t reta_size);
@@ -3697,7 +3848,7 @@ int rte_eth_dev_rss_reta_update(uint8_t port,
* - (-ENOTSUP) if hardware doesn't support.
* - (-EINVAL) if bad parameter.
*/
-int rte_eth_dev_rss_reta_query(uint8_t port,
+int rte_eth_dev_rss_reta_query(uint16_t port,
struct rte_eth_rss_reta_entry64 *reta_conf,
uint16_t reta_size);
@@ -3719,8 +3870,8 @@ int rte_eth_dev_rss_reta_query(uint8_t port,
* - (-ENODEV) if *port_id* invalid.
* - (-EINVAL) if bad parameter.
*/
-int rte_eth_dev_uc_hash_table_set(uint8_t port,struct ether_addr *addr,
- uint8_t on);
+int rte_eth_dev_uc_hash_table_set(uint16_t port, struct ether_addr *addr,
+ uint8_t on);
/**
* Updates all unicast hash bitmaps for receiving packet with any Unicast
@@ -3739,7 +3890,7 @@ int rte_eth_dev_uc_hash_table_set(uint8_t port,struct ether_addr *addr,
* - (-ENODEV) if *port_id* invalid.
* - (-EINVAL) if bad parameter.
*/
-int rte_eth_dev_uc_all_hash_table_set(uint8_t port,uint8_t on);
+int rte_eth_dev_uc_all_hash_table_set(uint16_t port, uint8_t on);
/**
* Set a traffic mirroring rule on an Ethernet device
@@ -3762,7 +3913,7 @@ int rte_eth_dev_uc_all_hash_table_set(uint8_t port,uint8_t on);
* - (-ENODEV) if *port_id* invalid.
* - (-EINVAL) if the mr_conf information is not correct.
*/
-int rte_eth_mirror_rule_set(uint8_t port_id,
+int rte_eth_mirror_rule_set(uint16_t port_id,
struct rte_eth_mirror_conf *mirror_conf,
uint8_t rule_id,
uint8_t on);
@@ -3780,7 +3931,7 @@ int rte_eth_mirror_rule_set(uint8_t port_id,
* - (-ENODEV) if *port_id* invalid.
* - (-EINVAL) if bad parameter.
*/
-int rte_eth_mirror_rule_reset(uint8_t port_id,
+int rte_eth_mirror_rule_reset(uint16_t port_id,
uint8_t rule_id);
/**
@@ -3798,7 +3949,7 @@ int rte_eth_mirror_rule_reset(uint8_t port_id,
* - (-ENODEV) if *port_id* invalid.
* - (-EINVAL) if bad parameter.
*/
-int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx,
+int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
uint16_t tx_rate);
/**
@@ -3814,7 +3965,7 @@ int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx,
* - (-ENOTSUP) if hardware doesn't support.
* - (-EINVAL) if bad parameter.
*/
-int rte_eth_dev_rss_hash_update(uint8_t port_id,
+int rte_eth_dev_rss_hash_update(uint16_t port_id,
struct rte_eth_rss_conf *rss_conf);
/**
@@ -3831,7 +3982,7 @@ int rte_eth_dev_rss_hash_update(uint8_t port_id,
* - (-ENOTSUP) if hardware doesn't support RSS.
*/
int
-rte_eth_dev_rss_hash_conf_get(uint8_t port_id,
+rte_eth_dev_rss_hash_conf_get(uint16_t port_id,
struct rte_eth_rss_conf *rss_conf);
/**
@@ -3852,7 +4003,7 @@ rte_eth_dev_rss_hash_conf_get(uint8_t port_id,
* - (-ENOTSUP) if hardware doesn't support tunnel type.
*/
int
-rte_eth_dev_udp_tunnel_port_add(uint8_t port_id,
+rte_eth_dev_udp_tunnel_port_add(uint16_t port_id,
struct rte_eth_udp_tunnel *tunnel_udp);
/**
@@ -3874,7 +4025,7 @@ rte_eth_dev_udp_tunnel_port_add(uint8_t port_id,
* - (-ENOTSUP) if hardware doesn't support tunnel type.
*/
int
-rte_eth_dev_udp_tunnel_port_delete(uint8_t port_id,
+rte_eth_dev_udp_tunnel_port_delete(uint16_t port_id,
struct rte_eth_udp_tunnel *tunnel_udp);
/**
@@ -3890,7 +4041,8 @@ rte_eth_dev_udp_tunnel_port_delete(uint8_t port_id,
* - (-ENOTSUP) if hardware doesn't support this filter type.
* - (-ENODEV) if *port_id* invalid.
*/
-int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_type);
+int rte_eth_dev_filter_supported(uint16_t port_id,
+ enum rte_filter_type filter_type);
/**
* Take operations to assigned filter type on an Ethernet device.
@@ -3910,7 +4062,7 @@ int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_ty
* - (-ENODEV) if *port_id* invalid.
* - others depends on the specific operations implementation.
*/
-int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
+int rte_eth_dev_filter_ctrl(uint16_t port_id, enum rte_filter_type filter_type,
enum rte_filter_op filter_op, void *arg);
/**
@@ -3925,7 +4077,7 @@ int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type,
* - (-ENODEV) if port identifier is invalid.
* - (-ENOTSUP) if hardware doesn't support.
*/
-int rte_eth_dev_get_dcb_info(uint8_t port_id,
+int rte_eth_dev_get_dcb_info(uint16_t port_id,
struct rte_eth_dcb_info *dcb_info);
/**
@@ -3952,7 +4104,7 @@ int rte_eth_dev_get_dcb_info(uint8_t port_id,
* NULL on error.
* On success, a pointer value which can later be used to remove the callback.
*/
-void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
+void *rte_eth_add_rx_callback(uint16_t port_id, uint16_t queue_id,
rte_rx_callback_fn fn, void *user_param);
/**
@@ -3980,7 +4132,7 @@ void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
* NULL on error.
* On success, a pointer value which can later be used to remove the callback.
*/
-void *rte_eth_add_first_rx_callback(uint8_t port_id, uint16_t queue_id,
+void *rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id,
rte_rx_callback_fn fn, void *user_param);
/**
@@ -4007,7 +4159,7 @@ void *rte_eth_add_first_rx_callback(uint8_t port_id, uint16_t queue_id,
* NULL on error.
* On success, a pointer value which can later be used to remove the callback.
*/
-void *rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
+void *rte_eth_add_tx_callback(uint16_t port_id, uint16_t queue_id,
rte_tx_callback_fn fn, void *user_param);
/**
@@ -4040,7 +4192,7 @@ void *rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
* - -EINVAL: The port_id or the queue_id is out of range, or the callback
* is NULL or not found for the port/queue.
*/
-int rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
+int rte_eth_remove_rx_callback(uint16_t port_id, uint16_t queue_id,
struct rte_eth_rxtx_callback *user_cb);
/**
@@ -4073,7 +4225,7 @@ int rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
* - -EINVAL: The port_id or the queue_id is out of range, or the callback
* is NULL or not found for the port/queue.
*/
-int rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
+int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id,
struct rte_eth_rxtx_callback *user_cb);
/**
@@ -4093,7 +4245,7 @@ int rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
* - -ENOTSUP: routine is not supported by the device PMD.
* - -EINVAL: The port_id or the queue_id is out of range.
*/
-int rte_eth_rx_queue_info_get(uint8_t port_id, uint16_t queue_id,
+int rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id,
struct rte_eth_rxq_info *qinfo);
/**
@@ -4113,7 +4265,7 @@ int rte_eth_rx_queue_info_get(uint8_t port_id, uint16_t queue_id,
* - -ENOTSUP: routine is not supported by the device PMD.
* - -EINVAL: The port_id or the queue_id is out of range.
*/
-int rte_eth_tx_queue_info_get(uint8_t port_id, uint16_t queue_id,
+int rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
struct rte_eth_txq_info *qinfo);
/**
@@ -4132,7 +4284,7 @@ int rte_eth_tx_queue_info_get(uint8_t port_id, uint16_t queue_id,
* - (-ENODEV) if *port_id* invalid.
* - others depends on the specific operations implementation.
*/
-int rte_eth_dev_get_reg_info(uint8_t port_id, struct rte_dev_reg_info *info);
+int rte_eth_dev_get_reg_info(uint16_t port_id, struct rte_dev_reg_info *info);
/**
* Retrieve size of device EEPROM
@@ -4145,7 +4297,7 @@ int rte_eth_dev_get_reg_info(uint8_t port_id, struct rte_dev_reg_info *info);
* - (-ENODEV) if *port_id* invalid.
* - others depends on the specific operations implementation.
*/
-int rte_eth_dev_get_eeprom_length(uint8_t port_id);
+int rte_eth_dev_get_eeprom_length(uint16_t port_id);
/**
* Retrieve EEPROM and EEPROM attribute
@@ -4161,7 +4313,7 @@ int rte_eth_dev_get_eeprom_length(uint8_t port_id);
* - (-ENODEV) if *port_id* invalid.
* - others depends on the specific operations implementation.
*/
-int rte_eth_dev_get_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info);
+int rte_eth_dev_get_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info);
/**
* Program EEPROM with provided data
@@ -4177,7 +4329,7 @@ int rte_eth_dev_get_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info);
* - (-ENODEV) if *port_id* invalid.
* - others depends on the specific operations implementation.
*/
-int rte_eth_dev_set_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info);
+int rte_eth_dev_set_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info);
/**
* Set the list of multicast addresses to filter on an Ethernet device.
@@ -4196,7 +4348,7 @@ int rte_eth_dev_set_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info);
* - (-ENOTSUP) if PMD of *port_id* doesn't support multicast filtering.
* - (-ENOSPC) if *port_id* has not enough multicast filtering resources.
*/
-int rte_eth_dev_set_mc_addr_list(uint8_t port_id,
+int rte_eth_dev_set_mc_addr_list(uint16_t port_id,
struct ether_addr *mc_addr_set,
uint32_t nb_mc_addr);
@@ -4211,7 +4363,7 @@ int rte_eth_dev_set_mc_addr_list(uint8_t port_id,
* - -ENODEV: The port ID is invalid.
* - -ENOTSUP: The function is not supported by the Ethernet driver.
*/
-int rte_eth_timesync_enable(uint8_t port_id);
+int rte_eth_timesync_enable(uint16_t port_id);
/**
* Disable IEEE1588/802.1AS timestamping for an Ethernet device.
@@ -4224,7 +4376,7 @@ int rte_eth_timesync_enable(uint8_t port_id);
* - -ENODEV: The port ID is invalid.
* - -ENOTSUP: The function is not supported by the Ethernet driver.
*/
-int rte_eth_timesync_disable(uint8_t port_id);
+int rte_eth_timesync_disable(uint16_t port_id);
/**
* Read an IEEE1588/802.1AS RX timestamp from an Ethernet device.
@@ -4243,7 +4395,7 @@ int rte_eth_timesync_disable(uint8_t port_id);
* - -ENODEV: The port ID is invalid.
* - -ENOTSUP: The function is not supported by the Ethernet driver.
*/
-int rte_eth_timesync_read_rx_timestamp(uint8_t port_id,
+int rte_eth_timesync_read_rx_timestamp(uint16_t port_id,
struct timespec *timestamp, uint32_t flags);
/**
@@ -4260,7 +4412,7 @@ int rte_eth_timesync_read_rx_timestamp(uint8_t port_id,
* - -ENODEV: The port ID is invalid.
* - -ENOTSUP: The function is not supported by the Ethernet driver.
*/
-int rte_eth_timesync_read_tx_timestamp(uint8_t port_id,
+int rte_eth_timesync_read_tx_timestamp(uint16_t port_id,
struct timespec *timestamp);
/**
@@ -4279,7 +4431,7 @@ int rte_eth_timesync_read_tx_timestamp(uint8_t port_id,
* - -ENODEV: The port ID is invalid.
* - -ENOTSUP: The function is not supported by the Ethernet driver.
*/
-int rte_eth_timesync_adjust_time(uint8_t port_id, int64_t delta);
+int rte_eth_timesync_adjust_time(uint16_t port_id, int64_t delta);
/**
* Read the time from the timesync clock on an Ethernet device.
@@ -4295,7 +4447,7 @@ int rte_eth_timesync_adjust_time(uint8_t port_id, int64_t delta);
* @return
* - 0: Success.
*/
-int rte_eth_timesync_read_time(uint8_t port_id, struct timespec *time);
+int rte_eth_timesync_read_time(uint16_t port_id, struct timespec *time);
/**
* Set the time of the timesync clock on an Ethernet device.
@@ -4314,7 +4466,7 @@ int rte_eth_timesync_read_time(uint8_t port_id, struct timespec *time);
* - -ENODEV: The port ID is invalid.
* - -ENOTSUP: The function is not supported by the Ethernet driver.
*/
-int rte_eth_timesync_write_time(uint8_t port_id, const struct timespec *time);
+int rte_eth_timesync_write_time(uint16_t port_id, const struct timespec *time);
/**
* Create memzone for HW rings.
@@ -4355,7 +4507,7 @@ rte_eth_dma_zone_reserve(const struct rte_eth_dev *eth_dev, const char *name,
* - (-ENOTSUP) if hardware doesn't support tunnel type.
*/
int
-rte_eth_dev_l2_tunnel_eth_type_conf(uint8_t port_id,
+rte_eth_dev_l2_tunnel_eth_type_conf(uint16_t port_id,
struct rte_eth_l2_tunnel_conf *l2_tunnel);
/**
@@ -4382,7 +4534,7 @@ rte_eth_dev_l2_tunnel_eth_type_conf(uint8_t port_id,
* - (-ENOTSUP) if hardware doesn't support tunnel type.
*/
int
-rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
+rte_eth_dev_l2_tunnel_offload_set(uint16_t port_id,
struct rte_eth_l2_tunnel_conf *l2_tunnel,
uint32_t mask,
uint8_t en);
@@ -4400,7 +4552,7 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
* - (-ENODEV or -EINVAL) on failure.
*/
int
-rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id);
+rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id);
/**
* Get the device name from port id
@@ -4414,7 +4566,7 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id);
* - (-EINVAL) on failure.
*/
int
-rte_eth_dev_get_name_by_port(uint8_t port_id, char *name);
+rte_eth_dev_get_name_by_port(uint16_t port_id, char *name);
/**
* Check that numbers of Rx and Tx descriptors satisfy descriptors limits from
@@ -4432,10 +4584,28 @@ rte_eth_dev_get_name_by_port(uint8_t port_id, char *name);
* - (0) if successful.
* - (-ENOTSUP, -ENODEV or -EINVAL) on failure.
*/
-int rte_eth_dev_adjust_nb_rx_tx_desc(uint8_t port_id,
+int rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t port_id,
uint16_t *nb_rx_desc,
uint16_t *nb_tx_desc);
+
+/**
+ * Test if a port supports specific mempool ops.
+ *
+ * @param port_id
+ * Port identifier of the Ethernet device.
+ * @param [in] pool
+ * The name of the pool operations to test.
+ * @return
+ * - 0: best mempool ops choice for this port.
+ * - 1: mempool ops are supported for this port.
+ * - -ENOTSUP: mempool ops not supported for this port.
+ * - -ENODEV: Invalid port Identifier.
+ * - -EINVAL: Pool param is null.
+ */
+int
+rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ether/rte_ethdev_pci.h b/lib/librte_ether/rte_ethdev_pci.h
index 56b10721..722075e0 100644
--- a/lib/librte_ether/rte_ethdev_pci.h
+++ b/lib/librte_ether/rte_ethdev_pci.h
@@ -36,6 +36,7 @@
#include <rte_malloc.h>
#include <rte_pci.h>
+#include <rte_bus_pci.h>
#include <rte_ethdev.h>
/**
diff --git a/lib/librte_ether/rte_ethdev_vdev.h b/lib/librte_ether/rte_ethdev_vdev.h
index 4d2c3e2b..ff92e6ed 100644
--- a/lib/librte_ether/rte_ethdev_vdev.h
+++ b/lib/librte_ether/rte_ethdev_vdev.h
@@ -35,7 +35,7 @@
#define _RTE_ETHDEV_VDEV_H_
#include <rte_malloc.h>
-#include <rte_vdev.h>
+#include <rte_bus_vdev.h>
#include <rte_ethdev.h>
/**
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ethdev_version.map
index 42837285..e9681ac8 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ethdev_version.map
@@ -187,3 +187,31 @@ DPDK_17.08 {
rte_tm_wred_profile_delete;
} DPDK_17.05;
+
+DPDK_17.11 {
+ global:
+
+ rte_eth_dev_get_sec_ctx;
+ rte_eth_dev_pool_ops_supported;
+ rte_eth_dev_reset;
+ rte_flow_error_set;
+
+} DPDK_17.08;
+
+EXPERIMENTAL {
+ global:
+
+ rte_mtr_capabilities_get;
+ rte_mtr_create;
+ rte_mtr_destroy;
+ rte_mtr_meter_disable;
+ rte_mtr_meter_dscp_table_update;
+ rte_mtr_meter_enable;
+ rte_mtr_meter_profile_add;
+ rte_mtr_meter_profile_delete;
+ rte_mtr_meter_profile_update;
+ rte_mtr_policer_actions_update;
+ rte_mtr_stats_read;
+ rte_mtr_stats_update;
+
+} DPDK_17.11;
diff --git a/lib/librte_ether/rte_flow.c b/lib/librte_ether/rte_flow.c
index 2001fbbf..66590630 100644
--- a/lib/librte_ether/rte_flow.c
+++ b/lib/librte_ether/rte_flow.c
@@ -108,7 +108,7 @@ static const struct rte_flow_desc_data rte_flow_desc_action[] = {
/* Get generic flow operations structure from a port. */
const struct rte_flow_ops *
-rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error)
+rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error)
{
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
const struct rte_flow_ops *ops;
@@ -132,7 +132,7 @@ rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error)
/* Check whether a flow rule can be created on a given port. */
int
-rte_flow_validate(uint8_t port_id,
+rte_flow_validate(uint16_t port_id,
const struct rte_flow_attr *attr,
const struct rte_flow_item pattern[],
const struct rte_flow_action actions[],
@@ -145,14 +145,14 @@ rte_flow_validate(uint8_t port_id,
return -rte_errno;
if (likely(!!ops->validate))
return ops->validate(dev, attr, pattern, actions, error);
- return -rte_flow_error_set(error, ENOSYS,
- RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
- NULL, rte_strerror(ENOSYS));
+ return rte_flow_error_set(error, ENOSYS,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, rte_strerror(ENOSYS));
}
/* Create a flow rule on a given port. */
struct rte_flow *
-rte_flow_create(uint8_t port_id,
+rte_flow_create(uint16_t port_id,
const struct rte_flow_attr *attr,
const struct rte_flow_item pattern[],
const struct rte_flow_action actions[],
@@ -172,7 +172,7 @@ rte_flow_create(uint8_t port_id,
/* Destroy a flow rule on a given port. */
int
-rte_flow_destroy(uint8_t port_id,
+rte_flow_destroy(uint16_t port_id,
struct rte_flow *flow,
struct rte_flow_error *error)
{
@@ -183,14 +183,14 @@ rte_flow_destroy(uint8_t port_id,
return -rte_errno;
if (likely(!!ops->destroy))
return ops->destroy(dev, flow, error);
- return -rte_flow_error_set(error, ENOSYS,
- RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
- NULL, rte_strerror(ENOSYS));
+ return rte_flow_error_set(error, ENOSYS,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, rte_strerror(ENOSYS));
}
/* Destroy all flow rules associated with a port. */
int
-rte_flow_flush(uint8_t port_id,
+rte_flow_flush(uint16_t port_id,
struct rte_flow_error *error)
{
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
@@ -200,14 +200,14 @@ rte_flow_flush(uint8_t port_id,
return -rte_errno;
if (likely(!!ops->flush))
return ops->flush(dev, error);
- return -rte_flow_error_set(error, ENOSYS,
- RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
- NULL, rte_strerror(ENOSYS));
+ return rte_flow_error_set(error, ENOSYS,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, rte_strerror(ENOSYS));
}
/* Query an existing flow rule. */
int
-rte_flow_query(uint8_t port_id,
+rte_flow_query(uint16_t port_id,
struct rte_flow *flow,
enum rte_flow_action_type action,
void *data,
@@ -220,14 +220,14 @@ rte_flow_query(uint8_t port_id,
return -rte_errno;
if (likely(!!ops->query))
return ops->query(dev, flow, action, data, error);
- return -rte_flow_error_set(error, ENOSYS,
- RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
- NULL, rte_strerror(ENOSYS));
+ return rte_flow_error_set(error, ENOSYS,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, rte_strerror(ENOSYS));
}
/* Restrict ingress traffic to the defined flow rules. */
int
-rte_flow_isolate(uint8_t port_id,
+rte_flow_isolate(uint16_t port_id,
int set,
struct rte_flow_error *error)
{
@@ -238,9 +238,28 @@ rte_flow_isolate(uint8_t port_id,
return -rte_errno;
if (likely(!!ops->isolate))
return ops->isolate(dev, set, error);
- return -rte_flow_error_set(error, ENOSYS,
- RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
- NULL, rte_strerror(ENOSYS));
+ return rte_flow_error_set(error, ENOSYS,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, rte_strerror(ENOSYS));
+}
+
+/* Initialize flow error structure. */
+int
+rte_flow_error_set(struct rte_flow_error *error,
+ int code,
+ enum rte_flow_error_type type,
+ const void *cause,
+ const char *message)
+{
+ if (error) {
+ *error = (struct rte_flow_error){
+ .type = type,
+ .cause = cause,
+ .message = message,
+ };
+ }
+ rte_errno = code;
+ return -code;
}
/** Compute storage space needed by item specification. */
diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
index bba6169f..47c88ea5 100644
--- a/lib/librte_ether/rte_flow.h
+++ b/lib/librte_ether/rte_flow.h
@@ -50,6 +50,7 @@
#include <rte_tcp.h>
#include <rte_udp.h>
#include <rte_byteorder.h>
+#include <rte_esp.h>
#ifdef __cplusplus
extern "C" {
@@ -309,6 +310,40 @@ enum rte_flow_item_type {
* See struct rte_flow_item_fuzzy.
*/
RTE_FLOW_ITEM_TYPE_FUZZY,
+
+ /**
+ * Matches a GTP header.
+ *
+ * Configure flow for GTP packets.
+ *
+ * See struct rte_flow_item_gtp.
+ */
+ RTE_FLOW_ITEM_TYPE_GTP,
+
+ /**
+ * Matches a GTP header.
+ *
+ * Configure flow for GTP-C packets.
+ *
+ * See struct rte_flow_item_gtp.
+ */
+ RTE_FLOW_ITEM_TYPE_GTPC,
+
+ /**
+ * Matches a GTP header.
+ *
+ * Configure flow for GTP-U packets.
+ *
+ * See struct rte_flow_item_gtp.
+ */
+ RTE_FLOW_ITEM_TYPE_GTPU,
+
+ /**
+ * Matches a ESP header.
+ *
+ * See struct rte_flow_item_esp.
+ */
+ RTE_FLOW_ITEM_TYPE_ESP,
};
/**
@@ -735,6 +770,49 @@ static const struct rte_flow_item_fuzzy rte_flow_item_fuzzy_mask = {
#endif
/**
+ * RTE_FLOW_ITEM_TYPE_GTP.
+ *
+ * Matches a GTPv1 header.
+ */
+struct rte_flow_item_gtp {
+ /**
+ * Version (3b), protocol type (1b), reserved (1b),
+ * Extension header flag (1b),
+ * Sequence number flag (1b),
+ * N-PDU number flag (1b).
+ */
+ uint8_t v_pt_rsv_flags;
+ uint8_t msg_type; /**< Message type. */
+ rte_be16_t msg_len; /**< Message length. */
+ rte_be32_t teid; /**< Tunnel endpoint identifier. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_GTP. */
+#ifndef __cplusplus
+static const struct rte_flow_item_gtp rte_flow_item_gtp_mask = {
+ .teid = RTE_BE32(0xffffffff),
+};
+#endif
+
+/**
+ * RTE_FLOW_ITEM_TYPE_ESP
+ *
+ * Matches an ESP header.
+ */
+struct rte_flow_item_esp {
+ struct esp_hdr hdr; /**< ESP header definition. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_ESP. */
+#ifndef __cplusplus
+static const struct rte_flow_item_esp rte_flow_item_esp_mask = {
+ .hdr = {
+ .spi = 0xffffffff,
+ },
+};
+#endif
+
+/**
* Matching pattern item definition.
*
* A pattern is formed by stacking items starting from the lowest protocol
@@ -915,6 +993,22 @@ enum rte_flow_action_type {
* See struct rte_flow_action_vf.
*/
RTE_FLOW_ACTION_TYPE_VF,
+
+ /**
+ * Traffic metering and policing (MTR).
+ *
+ * See struct rte_flow_action_meter.
+ * See file rte_mtr.h for MTR object configuration.
+ */
+ RTE_FLOW_ACTION_TYPE_METER,
+
+ /**
+ * Redirects packets to security engine of current device for security
+ * processing as specified by security session.
+ *
+ * See struct rte_flow_action_security.
+ */
+ RTE_FLOW_ACTION_TYPE_SECURITY
};
/**
@@ -1008,6 +1102,51 @@ struct rte_flow_action_vf {
};
/**
+ * RTE_FLOW_ACTION_TYPE_METER
+ *
+ * Traffic metering and policing (MTR).
+ *
+ * Packets matched by items of this type can be either dropped or passed to the
+ * next item with their color set by the MTR object.
+ *
+ * Non-terminating by default.
+ */
+struct rte_flow_action_meter {
+ uint32_t mtr_id; /**< MTR object ID created with rte_mtr_create(). */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_SECURITY
+ *
+ * Perform the security action on flows matched by the pattern items
+ * according to the configuration of the security session.
+ *
+ * This action modifies the payload of matched flows. For INLINE_CRYPTO, the
+ * security protocol headers and IV are fully provided by the application as
+ * specified in the flow pattern. The payload of matching packets is
+ * encrypted on egress, and decrypted and authenticated on ingress.
+ * For INLINE_PROTOCOL, the security protocol is fully offloaded to HW,
+ * providing full encapsulation and decapsulation of packets in security
+ * protocols. The flow pattern specifies both the outer security header fields
+ * and the inner packet fields. The security session specified in the action
+ * must match the pattern parameters.
+ *
+ * The security session specified in the action must be created on the same
+ * port as the flow action that is being specified.
+ *
+ * The ingress/egress flow attribute should match that specified in the
+ * security session if the security session supports the definition of the
+ * direction.
+ *
+ * Multiple flows can be configured to use the same security session.
+ *
+ * Non-terminating by default.
+ */
+struct rte_flow_action_security {
+ void *security_session; /**< Pointer to security session structure. */
+};
+
+/**
* Definition of a single action.
*
* A list of actions is terminated by a END action.
@@ -1116,7 +1255,7 @@ struct rte_flow_error {
* state (see rte_eth_dev_rx_queue_stop() and rte_eth_dev_stop()).
*/
int
-rte_flow_validate(uint8_t port_id,
+rte_flow_validate(uint16_t port_id,
const struct rte_flow_attr *attr,
const struct rte_flow_item pattern[],
const struct rte_flow_action actions[],
@@ -1143,7 +1282,7 @@ rte_flow_validate(uint8_t port_id,
* rte_flow_validate().
*/
struct rte_flow *
-rte_flow_create(uint8_t port_id,
+rte_flow_create(uint16_t port_id,
const struct rte_flow_attr *attr,
const struct rte_flow_item pattern[],
const struct rte_flow_action actions[],
@@ -1170,7 +1309,7 @@ rte_flow_create(uint8_t port_id,
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
-rte_flow_destroy(uint8_t port_id,
+rte_flow_destroy(uint16_t port_id,
struct rte_flow *flow,
struct rte_flow_error *error);
@@ -1191,7 +1330,7 @@ rte_flow_destroy(uint8_t port_id,
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
-rte_flow_flush(uint8_t port_id,
+rte_flow_flush(uint16_t port_id,
struct rte_flow_error *error);
/**
@@ -1219,7 +1358,7 @@ rte_flow_flush(uint8_t port_id,
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
-rte_flow_query(uint8_t port_id,
+rte_flow_query(uint16_t port_id,
struct rte_flow *flow,
enum rte_flow_action_type action,
void *data,
@@ -1267,7 +1406,31 @@ rte_flow_query(uint8_t port_id,
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
-rte_flow_isolate(uint8_t port_id, int set, struct rte_flow_error *error);
+rte_flow_isolate(uint16_t port_id, int set, struct rte_flow_error *error);
+
+/**
+ * Initialize flow error structure.
+ *
+ * @param[out] error
+ * Pointer to flow error structure (may be NULL).
+ * @param code
+ * Related error code (rte_errno).
+ * @param type
+ * Cause field and error types.
+ * @param cause
+ * Object responsible for the error.
+ * @param message
+ * Human-readable error message.
+ *
+ * @return
+ * Negative error code (errno value) and rte_errno is set.
+ */
+int
+rte_flow_error_set(struct rte_flow_error *error,
+ int code,
+ enum rte_flow_error_type type,
+ const void *cause,
+ const char *message);
/**
* Generic flow representation.
diff --git a/lib/librte_ether/rte_flow_driver.h b/lib/librte_ether/rte_flow_driver.h
index 4d95391d..254d1cb2 100644
--- a/lib/librte_ether/rte_flow_driver.h
+++ b/lib/librte_ether/rte_flow_driver.h
@@ -45,7 +45,6 @@
#include <stdint.h>
-#include <rte_errno.h>
#include "rte_ethdev.h"
#include "rte_flow.h"
@@ -128,43 +127,6 @@ struct rte_flow_ops {
};
/**
- * Initialize generic flow error structure.
- *
- * This function also sets rte_errno to a given value.
- *
- * @param[out] error
- * Pointer to flow error structure (may be NULL).
- * @param code
- * Related error code (rte_errno).
- * @param type
- * Cause field and error types.
- * @param cause
- * Object responsible for the error.
- * @param message
- * Human-readable error message.
- *
- * @return
- * Error code.
- */
-static inline int
-rte_flow_error_set(struct rte_flow_error *error,
- int code,
- enum rte_flow_error_type type,
- const void *cause,
- const char *message)
-{
- if (error) {
- *error = (struct rte_flow_error){
- .type = type,
- .cause = cause,
- .message = message,
- };
- }
- rte_errno = code;
- return code;
-}
-
-/**
* Get generic flow operations structure from a port.
*
* @param port_id
@@ -178,7 +140,7 @@ rte_flow_error_set(struct rte_flow_error *error,
* additional details.
*/
const struct rte_flow_ops *
-rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error);
+rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error);
#ifdef __cplusplus
}
diff --git a/lib/librte_ether/rte_mtr.c b/lib/librte_ether/rte_mtr.c
new file mode 100644
index 00000000..4f56f871
--- /dev/null
+++ b/lib/librte_ether/rte_mtr.c
@@ -0,0 +1,229 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#include <rte_errno.h>
+#include "rte_ethdev.h"
+#include "rte_mtr_driver.h"
+#include "rte_mtr.h"
+
+/* Get generic traffic metering & policing operations structure from a port. */
+const struct rte_mtr_ops *
+rte_mtr_ops_get(uint16_t port_id, struct rte_mtr_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ const struct rte_mtr_ops *ops;
+
+ if (!rte_eth_dev_is_valid_port(port_id)) {
+ rte_mtr_error_set(error,
+ ENODEV,
+ RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ rte_strerror(ENODEV));
+ return NULL;
+ }
+
+ if ((dev->dev_ops->mtr_ops_get == NULL) ||
+ (dev->dev_ops->mtr_ops_get(dev, &ops) != 0) ||
+ (ops == NULL)) {
+ rte_mtr_error_set(error,
+ ENOSYS,
+ RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ rte_strerror(ENOSYS));
+ return NULL;
+ }
+
+ return ops;
+}
+
+#define RTE_MTR_FUNC(port_id, func) \
+({ \
+ const struct rte_mtr_ops *ops = \
+ rte_mtr_ops_get(port_id, error); \
+ if (ops == NULL) \
+ return -rte_errno; \
+ \
+ if (ops->func == NULL) \
+ return -rte_mtr_error_set(error, \
+ ENOSYS, \
+ RTE_MTR_ERROR_TYPE_UNSPECIFIED, \
+ NULL, \
+ rte_strerror(ENOSYS)); \
+ \
+ ops->func; \
+})
+
+/* MTR capabilities get */
+int
+rte_mtr_capabilities_get(uint16_t port_id,
+ struct rte_mtr_capabilities *cap,
+ struct rte_mtr_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_MTR_FUNC(port_id, capabilities_get)(dev,
+ cap, error);
+}
+
+/* MTR meter profile add */
+int
+rte_mtr_meter_profile_add(uint16_t port_id,
+ uint32_t meter_profile_id,
+ struct rte_mtr_meter_profile *profile,
+ struct rte_mtr_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_MTR_FUNC(port_id, meter_profile_add)(dev,
+ meter_profile_id, profile, error);
+}
+
+/** MTR meter profile delete */
+int
+rte_mtr_meter_profile_delete(uint16_t port_id,
+ uint32_t meter_profile_id,
+ struct rte_mtr_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_MTR_FUNC(port_id, meter_profile_delete)(dev,
+ meter_profile_id, error);
+}
+
+/** MTR object create */
+int
+rte_mtr_create(uint16_t port_id,
+ uint32_t mtr_id,
+ struct rte_mtr_params *params,
+ int shared,
+ struct rte_mtr_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_MTR_FUNC(port_id, create)(dev,
+ mtr_id, params, shared, error);
+}
+
+/** MTR object destroy */
+int
+rte_mtr_destroy(uint16_t port_id,
+ uint32_t mtr_id,
+ struct rte_mtr_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_MTR_FUNC(port_id, destroy)(dev,
+ mtr_id, error);
+}
+
+/** MTR object meter enable */
+int
+rte_mtr_meter_enable(uint16_t port_id,
+ uint32_t mtr_id,
+ struct rte_mtr_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_MTR_FUNC(port_id, meter_enable)(dev,
+ mtr_id, error);
+}
+
+/** MTR object meter disable */
+int
+rte_mtr_meter_disable(uint16_t port_id,
+ uint32_t mtr_id,
+ struct rte_mtr_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_MTR_FUNC(port_id, meter_disable)(dev,
+ mtr_id, error);
+}
+
+/** MTR object meter profile update */
+int
+rte_mtr_meter_profile_update(uint16_t port_id,
+ uint32_t mtr_id,
+ uint32_t meter_profile_id,
+ struct rte_mtr_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_MTR_FUNC(port_id, meter_profile_update)(dev,
+ mtr_id, meter_profile_id, error);
+}
+
+/** MTR object meter DSCP table update */
+int
+rte_mtr_meter_dscp_table_update(uint16_t port_id,
+ uint32_t mtr_id,
+ enum rte_mtr_color *dscp_table,
+ struct rte_mtr_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_MTR_FUNC(port_id, meter_dscp_table_update)(dev,
+ mtr_id, dscp_table, error);
+}
+
+/** MTR object policer action update */
+int
+rte_mtr_policer_actions_update(uint16_t port_id,
+ uint32_t mtr_id,
+ uint32_t action_mask,
+ enum rte_mtr_policer_action *actions,
+ struct rte_mtr_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_MTR_FUNC(port_id, policer_actions_update)(dev,
+ mtr_id, action_mask, actions, error);
+}
+
+/** MTR object enabled stats update */
+int
+rte_mtr_stats_update(uint16_t port_id,
+ uint32_t mtr_id,
+ uint64_t stats_mask,
+ struct rte_mtr_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_MTR_FUNC(port_id, stats_update)(dev,
+ mtr_id, stats_mask, error);
+}
+
+/** MTR object stats read */
+int
+rte_mtr_stats_read(uint16_t port_id,
+ uint32_t mtr_id,
+ struct rte_mtr_stats *stats,
+ uint64_t *stats_mask,
+ int clear,
+ struct rte_mtr_error *error)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ return RTE_MTR_FUNC(port_id, stats_read)(dev,
+ mtr_id, stats, stats_mask, clear, error);
+}
diff --git a/lib/librte_ether/rte_mtr.h b/lib/librte_ether/rte_mtr.h
new file mode 100644
index 00000000..f6b6ef3b
--- /dev/null
+++ b/lib/librte_ether/rte_mtr.h
@@ -0,0 +1,730 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 Intel Corporation
+ * Copyright 2017 NXP
+ * Copyright 2017 Cavium
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_MTR_H__
+#define __INCLUDE_RTE_MTR_H__
+
+/**
+ * @file
+ * RTE Generic Traffic Metering and Policing API
+ *
+ * This interface provides the ability to configure the traffic metering and
+ * policing (MTR) in a generic way.
+ *
+ * The processing done for each input packet hitting a MTR object is:
+ * A) Traffic metering: The packet is assigned a color (the meter output
+ * color), based on the previous history of the flow reflected in the
+ * current state of the MTR object, according to the specific traffic
+ * metering algorithm. The traffic metering algorithm can typically work
+ * in color aware mode, in which case the input packet already has an
+ * initial color (the input color), or in color blind mode, which is
+ * equivalent to considering all input packets initially colored as green.
+ * B) Policing: There is a separate policer action configured for each meter
+ * output color, which can:
+ * a) Drop the packet.
+ * b) Keep the same packet color: the policer output color matches the
+ * meter output color (essentially a no-op action).
+ * c) Recolor the packet: the policer output color is different than
+ * the meter output color.
+ * The policer output color is the output color of the packet, which is
+ * set in the packet meta-data (i.e. struct rte_mbuf::sched::color).
+ * C) Statistics: The set of counters maintained for each MTR object is
+ * configurable and subject to the implementation support. This set
+ * includes the number of packets and bytes dropped or passed for each
+ * output color.
+ *
+ * Once successfully created, an MTR object is linked to one or several flows
+ * through the meter action of the flow API.
+ * A) Whether an MTR object is private to a flow or potentially shared by
+ * several flows has to be specified at creation time.
+ * B) Several meter actions can be potentially registered for the same flow.
+ *
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ */
+#include <stdint.h>
+
+#include <rte_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Color
+ */
+enum rte_mtr_color {
+ RTE_MTR_GREEN = 0, /**< Green */
+ RTE_MTR_YELLOW, /**< Yellow */
+ RTE_MTR_RED, /**< Red */
+ RTE_MTR_COLORS /**< Number of colors. */
+};
+
+/**
+ * Statistics counter type
+ */
+enum rte_mtr_stats_type {
+ /** Number of packets passed as green by the policer. */
+ RTE_MTR_STATS_N_PKTS_GREEN = 1 << 0,
+
+ /** Number of packets passed as yellow by the policer. */
+ RTE_MTR_STATS_N_PKTS_YELLOW = 1 << 1,
+
+ /** Number of packets passed as red by the policer. */
+ RTE_MTR_STATS_N_PKTS_RED = 1 << 2,
+
+ /** Number of packets dropped by the policer. */
+ RTE_MTR_STATS_N_PKTS_DROPPED = 1 << 3,
+
+ /** Number of bytes passed as green by the policer. */
+ RTE_MTR_STATS_N_BYTES_GREEN = 1 << 4,
+
+ /** Number of bytes passed as yellow by the policer. */
+ RTE_MTR_STATS_N_BYTES_YELLOW = 1 << 5,
+
+ /** Number of bytes passed as red by the policer. */
+ RTE_MTR_STATS_N_BYTES_RED = 1 << 6,
+
+ /** Number of bytes dropped by the policer. */
+ RTE_MTR_STATS_N_BYTES_DROPPED = 1 << 7,
+};
+
+/**
+ * Statistics counters
+ */
+struct rte_mtr_stats {
+ /** Number of packets passed by the policer (per color). */
+ uint64_t n_pkts[RTE_MTR_COLORS];
+
+ /** Number of bytes passed by the policer (per color). */
+ uint64_t n_bytes[RTE_MTR_COLORS];
+
+ /** Number of packets dropped by the policer. */
+ uint64_t n_pkts_dropped;
+
+ /** Number of bytes passed by the policer. */
+ uint64_t n_bytes_dropped;
+};
+
+/**
+ * Traffic metering algorithms
+ */
+enum rte_mtr_algorithm {
+ /** No traffic metering performed, the output color is the same as the
+ * input color for every input packet. The meter of the MTR object is
+ * working in pass-through mode, having same effect as meter disable.
+ * @see rte_mtr_meter_disable()
+ */
+ RTE_MTR_NONE = 0,
+
+ /** Single Rate Three Color Marker (srTCM) - IETF RFC 2697. */
+ RTE_MTR_SRTCM_RFC2697,
+
+ /** Two Rate Three Color Marker (trTCM) - IETF RFC 2698. */
+ RTE_MTR_TRTCM_RFC2698,
+
+ /** Two Rate Three Color Marker (trTCM) - IETF RFC 4115. */
+ RTE_MTR_TRTCM_RFC4115,
+};
+
+/**
+ * Meter profile
+ */
+struct rte_mtr_meter_profile {
+ /** Traffic metering algorithm. */
+ enum rte_mtr_algorithm alg;
+
+ RTE_STD_C11
+ union {
+ /** Items only valid when *alg* is set to srTCM - RFC 2697. */
+ struct {
+ /** Committed Information Rate (CIR) (bytes/second). */
+ uint64_t cir;
+
+ /** Committed Burst Size (CBS) (bytes). */
+ uint64_t cbs;
+
+ /** Excess Burst Size (EBS) (bytes). */
+ uint64_t ebs;
+ } srtcm_rfc2697;
+
+ /** Items only valid when *alg* is set to trTCM - RFC 2698. */
+ struct {
+ /** Committed Information Rate (CIR) (bytes/second). */
+ uint64_t cir;
+
+ /** Peak Information Rate (PIR) (bytes/second). */
+ uint64_t pir;
+
+ /** Committed Burst Size (CBS) (byes). */
+ uint64_t cbs;
+
+ /** Peak Burst Size (PBS) (bytes). */
+ uint64_t pbs;
+ } trtcm_rfc2698;
+
+ /** Items only valid when *alg* is set to trTCM - RFC 4115. */
+ struct {
+ /** Committed Information Rate (CIR) (bytes/second). */
+ uint64_t cir;
+
+ /** Excess Information Rate (EIR) (bytes/second). */
+ uint64_t eir;
+
+ /** Committed Burst Size (CBS) (byes). */
+ uint64_t cbs;
+
+ /** Excess Burst Size (EBS) (bytes). */
+ uint64_t ebs;
+ } trtcm_rfc4115;
+ };
+};
+
+/**
+ * Policer actions
+ */
+enum rte_mtr_policer_action {
+ /** Recolor the packet as green. */
+ MTR_POLICER_ACTION_COLOR_GREEN = 0,
+
+ /** Recolor the packet as yellow. */
+ MTR_POLICER_ACTION_COLOR_YELLOW,
+
+ /** Recolor the packet as red. */
+ MTR_POLICER_ACTION_COLOR_RED,
+
+ /** Drop the packet. */
+ MTR_POLICER_ACTION_DROP,
+};
+
+/**
+ * Parameters for each traffic metering & policing object
+ *
+ * @see enum rte_mtr_stats_type
+ */
+struct rte_mtr_params {
+ /** Meter profile ID. */
+ uint32_t meter_profile_id;
+
+ /** Meter input color in case of MTR object chaining. When non-zero: if
+ * a previous MTR object is enabled in the same flow, then the color
+ * determined by the latest MTR object in the same flow is used as the
+ * input color by the current MTR object, otherwise the current MTR
+ * object uses the *dscp_table* to determine the input color. When zero:
+ * the color determined by any previous MTR object in same flow is
+ * ignored by the current MTR object, which uses the *dscp_table* to
+ * determine the input color.
+ */
+ int use_prev_mtr_color;
+
+ /** Meter input color. When non-NULL: it points to a pre-allocated and
+ * pre-populated table with exactly 64 elements providing the input
+ * color for each value of the IPv4/IPv6 Differentiated Services Code
+ * Point (DSCP) input packet field. When NULL: it is equivalent to
+ * setting this parameter to an all-green populated table (i.e. table
+ * with all the 64 elements set to green color). The color blind mode
+ * is configured by setting *use_prev_mtr_color* to 0 and *dscp_table*
+ * to either NULL or to an all-green populated table. When
+ * *use_prev_mtr_color* is non-zero value or when *dscp_table* contains
+ * at least one yellow or red color element, then the color aware mode
+ * is configured.
+ */
+ enum rte_mtr_color *dscp_table;
+
+ /** Non-zero to enable the meter, zero to disable the meter at the time
+ * of MTR object creation. Ignored when the meter profile indicated by
+ * *meter_profile_id* is set to NONE.
+ * @see rte_mtr_meter_disable()
+ */
+ int meter_enable;
+
+ /** Policer actions (per meter output color). */
+ enum rte_mtr_policer_action action[RTE_MTR_COLORS];
+
+ /** Set of stats counters to be enabled.
+ * @see enum rte_mtr_stats_type
+ */
+ uint64_t stats_mask;
+};
+
+/**
+ * MTR capabilities
+ */
+struct rte_mtr_capabilities {
+ /** Maximum number of MTR objects. */
+ uint32_t n_max;
+
+ /** Maximum number of MTR objects that can be shared by multiple flows.
+ * The value of zero indicates that shared MTR objects are not
+ * supported. The maximum value is *n_max*.
+ */
+ uint32_t n_shared_max;
+
+ /** When non-zero, this flag indicates that all the MTR objects that
+ * cannot be shared by multiple flows have identical capability set.
+ */
+ int identical;
+
+ /** When non-zero, this flag indicates that all the MTR objects that
+ * can be shared by multiple flows have identical capability set.
+ */
+ int shared_identical;
+
+ /** Maximum number of flows that can share the same MTR object. The
+ * value of zero is invalid. The value of 1 means that shared MTR
+ * objects not supported.
+ */
+ uint32_t shared_n_flows_per_mtr_max;
+
+ /** Maximum number of MTR objects that can be part of the same flow. The
+ * value of zero is invalid. The value of 1 indicates that MTR object
+ * chaining is not supported. The maximum value is *n_max*.
+ */
+ uint32_t chaining_n_mtrs_per_flow_max;
+
+ /**
+ * When non-zero, it indicates that the packet color identified by one
+ * MTR object can be used as the packet input color by any subsequent
+ * MTR object from the same flow. When zero, it indicates that the color
+ * determined by one MTR object is always ignored by any subsequent MTR
+ * object from the same flow. Only valid when MTR chaining is supported,
+ * i.e. *chaining_n_mtrs_per_flow_max* is greater than 1. When non-zero,
+ * it also means that the color aware mode is supported by at least one
+ * metering algorithm.
+ */
+ int chaining_use_prev_mtr_color_supported;
+
+ /**
+ * When non-zero, it indicates that the packet color identified by one
+ * MTR object is always used as the packet input color by any subsequent
+ * MTR object that is part of the same flow. When zero, it indicates
+ * that whether the color determined by one MTR object is either ignored
+ * or used as the packet input color by any subsequent MTR object from
+ * the same flow is individually configurable for each MTR object. Only
+ * valid when *chaining_use_prev_mtr_color_supported* is non-zero.
+ */
+ int chaining_use_prev_mtr_color_enforced;
+
+ /** Maximum number of MTR objects that can have their meter configured
+ * to run the srTCM RFC 2697 algorithm. The value of 0 indicates this
+ * metering algorithm is not supported. The maximum value is *n_max*.
+ */
+ uint32_t meter_srtcm_rfc2697_n_max;
+
+ /** Maximum number of MTR objects that can have their meter configured
+ * to run the trTCM RFC 2698 algorithm. The value of 0 indicates this
+ * metering algorithm is not supported. The maximum value is *n_max*.
+ */
+ uint32_t meter_trtcm_rfc2698_n_max;
+
+ /** Maximum number of MTR objects that can have their meter configured
+ * to run the trTCM RFC 4115 algorithm. The value of 0 indicates this
+ * metering algorithm is not supported. The maximum value is *n_max*.
+ */
+ uint32_t meter_trtcm_rfc4115_n_max;
+
+ /** Maximum traffic rate that can be metered by a single MTR object. For
+ * srTCM RFC 2697, this is the maximum CIR rate. For trTCM RFC 2698,
+ * this is the maximum PIR rate. For trTCM RFC 4115, this is the maximum
+ * value for the sum of PIR and EIR rates.
+ */
+ uint64_t meter_rate_max;
+
+ /**
+ * When non-zero, it indicates that color aware mode is supported for
+ * the srTCM RFC 2697 metering algorithm.
+ */
+ int color_aware_srtcm_rfc2697_supported;
+
+ /**
+ * When non-zero, it indicates that color aware mode is supported for
+ * the trTCM RFC 2698 metering algorithm.
+ */
+ int color_aware_trtcm_rfc2698_supported;
+
+ /**
+ * When non-zero, it indicates that color aware mode is supported for
+ * the trTCM RFC 4115 metering algorithm.
+ */
+ int color_aware_trtcm_rfc4115_supported;
+
+ /** When non-zero, it indicates that the policer packet recolor actions
+ * are supported.
+ * @see enum rte_mtr_policer_action
+ */
+ int policer_action_recolor_supported;
+
+ /** When non-zero, it indicates that the policer packet drop action is
+ * supported.
+ * @see enum rte_mtr_policer_action
+ */
+ int policer_action_drop_supported;
+
+ /** Set of supported statistics counter types.
+ * @see enum rte_mtr_stats_type
+ */
+ uint64_t stats_mask;
+};
+
+/**
+ * Verbose error types.
+ *
+ * Most of them provide the type of the object referenced by struct
+ * rte_mtr_error::cause.
+ */
+enum rte_mtr_error_type {
+ RTE_MTR_ERROR_TYPE_NONE, /**< No error. */
+ RTE_MTR_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */
+ RTE_MTR_ERROR_TYPE_METER_PROFILE_ID,
+ RTE_MTR_ERROR_TYPE_METER_PROFILE,
+ RTE_MTR_ERROR_TYPE_MTR_ID,
+ RTE_MTR_ERROR_TYPE_MTR_PARAMS,
+ RTE_MTR_ERROR_TYPE_POLICER_ACTION_GREEN,
+ RTE_MTR_ERROR_TYPE_POLICER_ACTION_YELLOW,
+ RTE_MTR_ERROR_TYPE_POLICER_ACTION_RED,
+ RTE_MTR_ERROR_TYPE_STATS_MASK,
+ RTE_MTR_ERROR_TYPE_STATS,
+ RTE_MTR_ERROR_TYPE_SHARED,
+};
+
+/**
+ * Verbose error structure definition.
+ *
+ * This object is normally allocated by applications and set by PMDs, the
+ * message points to a constant string which does not need to be freed by
+ * the application, however its pointer can be considered valid only as long
+ * as its associated DPDK port remains configured. Closing the underlying
+ * device or unloading the PMD invalidates it.
+ *
+ * Both cause and message may be NULL regardless of the error type.
+ */
+struct rte_mtr_error {
+ enum rte_mtr_error_type type; /**< Cause field and error type. */
+ const void *cause; /**< Object responsible for the error. */
+ const char *message; /**< Human-readable error message. */
+};
+
+/**
+ * MTR capabilities get
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[out] cap
+ * MTR capabilities. Needs to be pre-allocated and valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ */
+int
+rte_mtr_capabilities_get(uint16_t port_id,
+ struct rte_mtr_capabilities *cap,
+ struct rte_mtr_error *error);
+
+/**
+ * Meter profile add
+ *
+ * Create a new meter profile with ID set to *meter_profile_id*. The new profile
+ * is used to create one or several MTR objects.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] meter_profile_id
+ * ID for the new meter profile. Needs to be unused by any of the existing
+ * meter profiles added for the current port.
+ * @param[in] profile
+ * Meter profile parameters. Needs to be pre-allocated and valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ */
+int
+rte_mtr_meter_profile_add(uint16_t port_id,
+ uint32_t meter_profile_id,
+ struct rte_mtr_meter_profile *profile,
+ struct rte_mtr_error *error);
+
+/**
+ * Meter profile delete
+ *
+ * Delete an existing meter profile. This operation fails when there is
+ * currently at least one user (i.e. MTR object) of this profile.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] meter_profile_id
+ * Meter profile ID. Needs to be the valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ */
+int
+rte_mtr_meter_profile_delete(uint16_t port_id,
+ uint32_t meter_profile_id,
+ struct rte_mtr_error *error);
+
+/**
+ * MTR object create
+ *
+ * Create a new MTR object for the current port. This object is run as part of
+ * associated flow action for traffic metering and policing.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] mtr_id
+ * MTR object ID. Needs to be unused by any of the existing MTR objects.
+ * created for the current port.
+ * @param[in] params
+ * MTR object params. Needs to be pre-allocated and valid.
+ * @param[in] shared
+ * Non-zero when this MTR object can be shared by multiple flows, zero when
+ * this MTR object can be used by a single flow.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see enum rte_flow_action_type::RTE_FLOW_ACTION_TYPE_METER
+ */
+int
+rte_mtr_create(uint16_t port_id,
+ uint32_t mtr_id,
+ struct rte_mtr_params *params,
+ int shared,
+ struct rte_mtr_error *error);
+
+/**
+ * MTR object destroy
+ *
+ * Delete an existing MTR object. This operation fails when there is currently
+ * at least one user (i.e. flow) of this MTR object.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] mtr_id
+ * MTR object ID. Needs to be valid.
+ * created for the current port.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ */
+int
+rte_mtr_destroy(uint16_t port_id,
+ uint32_t mtr_id,
+ struct rte_mtr_error *error);
+
+/**
+ * MTR object meter disable
+ *
+ * Disable the meter of an existing MTR object. In disabled state, the meter of
+ * the current MTR object works in pass-through mode, meaning that for each
+ * input packet the meter output color is always the same as the input color. In
+ * particular, when the meter of the current MTR object is configured in color
+ * blind mode, the input color is always green, so the meter output color is
+ * also always green. Note that the policer and the statistics of the current
+ * MTR object are working as usual while the meter is disabled. No action is
+ * taken and this function returns successfully when the meter of the current
+ * MTR object is already disabled.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] mtr_id
+ * MTR object ID.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ */
+int
+rte_mtr_meter_disable(uint16_t port_id,
+ uint32_t mtr_id,
+ struct rte_mtr_error *error);
+
+/**
+ * MTR object meter enable
+ *
+ * Enable the meter of an existing MTR object. If the MTR object has its meter
+ * already enabled, then no action is taken and this function returns
+ * successfully.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] mtr_id
+ * MTR object ID.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ */
+int
+rte_mtr_meter_enable(uint16_t port_id,
+ uint32_t mtr_id,
+ struct rte_mtr_error *error);
+
+/**
+ * MTR object meter profile update
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] mtr_id
+ * MTR object ID. Needs to be valid.
+ * @param[in] meter_profile_id
+ * Meter profile ID for the current MTR object. Needs to be valid.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ */
+int
+rte_mtr_meter_profile_update(uint16_t port_id,
+ uint32_t mtr_id,
+ uint32_t meter_profile_id,
+ struct rte_mtr_error *error);
+
+/**
+ * MTR object DSCP table update
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] mtr_id
+ * MTR object ID. Needs to be valid.
+ * @param[in] dscp_table
+ * When non-NULL: it points to a pre-allocated and pre-populated table with
+ * exactly 64 elements providing the input color for each value of the
+ * IPv4/IPv6 Differentiated Services Code Point (DSCP) input packet field.
+ * When NULL: it is equivalent to setting this parameter to an “all-green”
+ * populated table (i.e. table with all the 64 elements set to green color).
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ */
+int
+rte_mtr_meter_dscp_table_update(uint16_t port_id,
+ uint32_t mtr_id,
+ enum rte_mtr_color *dscp_table,
+ struct rte_mtr_error *error);
+
+/**
+ * MTR object policer actions update
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] mtr_id
+ * MTR object ID. Needs to be valid.
+ * @param[in] action_mask
+ * Bit mask indicating which policer actions need to be updated. One or more
+ * policer actions can be updated in a single function invocation. To update
+ * the policer action associated with color C, bit (1 << C) needs to be set in
+ * *action_mask* and element at position C in the *actions* array needs to be
+ * valid.
+ * @param[in] actions
+ * Pre-allocated and pre-populated array of policer actions.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ */
+int
+rte_mtr_policer_actions_update(uint16_t port_id,
+ uint32_t mtr_id,
+ uint32_t action_mask,
+ enum rte_mtr_policer_action *actions,
+ struct rte_mtr_error *error);
+
+/**
+ * MTR object enabled statistics counters update
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] mtr_id
+ * MTR object ID. Needs to be valid.
+ * @param[in] stats_mask
+ * Mask of statistics counter types to be enabled for the current MTR object.
+ * Any statistics counter type not included in this set is to be disabled for
+ * the current MTR object.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see enum rte_mtr_stats_type
+ */
+int
+rte_mtr_stats_update(uint16_t port_id,
+ uint32_t mtr_id,
+ uint64_t stats_mask,
+ struct rte_mtr_error *error);
+
+/**
+ * MTR object statistics counters read
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] mtr_id
+ * MTR object ID. Needs to be valid.
+ * @param[out] stats
+ * When non-NULL, it contains the current value for the statistics counters
+ * enabled for the current MTR object.
+ * @param[out] stats_mask
+ * When non-NULL, it contains the mask of statistics counter types that are
+ * currently enabled for this MTR object, indicating which of the counters
+ * retrieved with the *stats* structure are valid.
+ * @param[in] clear
+ * When this parameter has a non-zero value, the statistics counters are
+ * cleared (i.e. set to zero) immediately after they have been read,
+ * otherwise the statistics counters are left untouched.
+ * @param[out] error
+ * Error details. Filled in only on error, when not NULL.
+ * @return
+ * 0 on success, non-zero error code otherwise.
+ *
+ * @see enum rte_mtr_stats_type
+ */
+int
+rte_mtr_stats_read(uint16_t port_id,
+ uint32_t mtr_id,
+ struct rte_mtr_stats *stats,
+ uint64_t *stats_mask,
+ int clear,
+ struct rte_mtr_error *error);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_MTR_H__ */
diff --git a/lib/librte_ether/rte_mtr_driver.h b/lib/librte_ether/rte_mtr_driver.h
new file mode 100644
index 00000000..6a289ef1
--- /dev/null
+++ b/lib/librte_ether/rte_mtr_driver.h
@@ -0,0 +1,221 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_MTR_DRIVER_H__
+#define __INCLUDE_RTE_MTR_DRIVER_H__
+
+/**
+ * @file
+ * RTE Generic Traffic Metering and Policing API (Driver Side)
+ *
+ * This file provides implementation helpers for internal use by PMDs, they
+ * are not intended to be exposed to applications and are not subject to ABI
+ * versioning.
+ */
+
+#include <stdint.h>
+
+#include <rte_errno.h>
+#include "rte_ethdev.h"
+#include "rte_mtr.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int (*rte_mtr_capabilities_get_t)(struct rte_eth_dev *dev,
+ struct rte_mtr_capabilities *cap,
+ struct rte_mtr_error *error);
+/**< @internal MTR capabilities get */
+
+typedef int (*rte_mtr_meter_profile_add_t)(struct rte_eth_dev *dev,
+ uint32_t meter_profile_id,
+ struct rte_mtr_meter_profile *profile,
+ struct rte_mtr_error *error);
+/**< @internal MTR meter profile add */
+
+typedef int (*rte_mtr_meter_profile_delete_t)(struct rte_eth_dev *dev,
+ uint32_t meter_profile_id,
+ struct rte_mtr_error *error);
+/**< @internal MTR meter profile delete */
+
+typedef int (*rte_mtr_create_t)(struct rte_eth_dev *dev,
+ uint32_t mtr_id,
+ struct rte_mtr_params *params,
+ int shared,
+ struct rte_mtr_error *error);
+/**< @internal MTR object create */
+
+typedef int (*rte_mtr_destroy_t)(struct rte_eth_dev *dev,
+ uint32_t mtr_id,
+ struct rte_mtr_error *error);
+/**< @internal MTR object destroy */
+
+typedef int (*rte_mtr_meter_enable_t)(struct rte_eth_dev *dev,
+ uint32_t mtr_id,
+ struct rte_mtr_error *error);
+/**< @internal MTR object meter enable */
+
+typedef int (*rte_mtr_meter_disable_t)(struct rte_eth_dev *dev,
+ uint32_t mtr_id,
+ struct rte_mtr_error *error);
+/**< @internal MTR object meter disable */
+
+typedef int (*rte_mtr_meter_profile_update_t)(struct rte_eth_dev *dev,
+ uint32_t mtr_id,
+ uint32_t meter_profile_id,
+ struct rte_mtr_error *error);
+/**< @internal MTR object meter profile update */
+
+typedef int (*rte_mtr_meter_dscp_table_update_t)(struct rte_eth_dev *dev,
+ uint32_t mtr_id,
+ enum rte_mtr_color *dscp_table,
+ struct rte_mtr_error *error);
+/**< @internal MTR object meter DSCP table update */
+
+typedef int (*rte_mtr_policer_actions_update_t)(struct rte_eth_dev *dev,
+ uint32_t mtr_id,
+ uint32_t action_mask,
+ enum rte_mtr_policer_action *actions,
+ struct rte_mtr_error *error);
+/**< @internal MTR object policer action update*/
+
+typedef int (*rte_mtr_stats_update_t)(struct rte_eth_dev *dev,
+ uint32_t mtr_id,
+ uint64_t stats_mask,
+ struct rte_mtr_error *error);
+/**< @internal MTR object enabled stats update */
+
+typedef int (*rte_mtr_stats_read_t)(struct rte_eth_dev *dev,
+ uint32_t mtr_id,
+ struct rte_mtr_stats *stats,
+ uint64_t *stats_mask,
+ int clear,
+ struct rte_mtr_error *error);
+/**< @internal MTR object stats read */
+
+struct rte_mtr_ops {
+ /** MTR capabilities get */
+ rte_mtr_capabilities_get_t capabilities_get;
+
+ /** MTR meter profile add */
+ rte_mtr_meter_profile_add_t meter_profile_add;
+
+ /** MTR meter profile delete */
+ rte_mtr_meter_profile_delete_t meter_profile_delete;
+
+ /** MTR object create */
+ rte_mtr_create_t create;
+
+ /** MTR object destroy */
+ rte_mtr_destroy_t destroy;
+
+ /** MTR object meter enable */
+ rte_mtr_meter_enable_t meter_enable;
+
+ /** MTR object meter disable */
+ rte_mtr_meter_disable_t meter_disable;
+
+ /** MTR object meter profile update */
+ rte_mtr_meter_profile_update_t meter_profile_update;
+
+ /** MTR object meter DSCP table update */
+ rte_mtr_meter_dscp_table_update_t meter_dscp_table_update;
+
+ /** MTR object policer action update */
+ rte_mtr_policer_actions_update_t policer_actions_update;
+
+ /** MTR object enabled stats update */
+ rte_mtr_stats_update_t stats_update;
+
+ /** MTR object stats read */
+ rte_mtr_stats_read_t stats_read;
+};
+
+/**
+ * Initialize generic error structure.
+ *
+ * This function also sets rte_errno to a given value.
+ *
+ * @param[out] error
+ * Pointer to error structure (may be NULL).
+ * @param[in] code
+ * Related error code (rte_errno).
+ * @param[in] type
+ * Cause field and error type.
+ * @param[in] cause
+ * Object responsible for the error.
+ * @param[in] message
+ * Human-readable error message.
+ *
+ * @return
+ * Error code.
+ */
+static inline int
+rte_mtr_error_set(struct rte_mtr_error *error,
+ int code,
+ enum rte_mtr_error_type type,
+ const void *cause,
+ const char *message)
+{
+ if (error) {
+ *error = (struct rte_mtr_error){
+ .type = type,
+ .cause = cause,
+ .message = message,
+ };
+ }
+ rte_errno = code;
+ return code;
+}
+
+/**
+ * Get generic traffic metering and policing operations structure from a port
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[out] error
+ * Error details
+ *
+ * @return
+ * The traffic metering and policing operations structure associated with
+ * port_id on success, NULL otherwise.
+ */
+const struct rte_mtr_ops *
+rte_mtr_ops_get(uint16_t port_id, struct rte_mtr_error *error);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_MTR_DRIVER_H__ */
diff --git a/lib/librte_ether/rte_tm.c b/lib/librte_ether/rte_tm.c
index 71679650..ceac3411 100644
--- a/lib/librte_ether/rte_tm.c
+++ b/lib/librte_ether/rte_tm.c
@@ -40,7 +40,7 @@
/* Get generic traffic manager operations structure from a port. */
const struct rte_tm_ops *
-rte_tm_ops_get(uint8_t port_id, struct rte_tm_error *error)
+rte_tm_ops_get(uint16_t port_id, struct rte_tm_error *error)
{
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
const struct rte_tm_ops *ops;
@@ -87,7 +87,7 @@ rte_tm_ops_get(uint8_t port_id, struct rte_tm_error *error)
/* Get number of leaf nodes */
int
-rte_tm_get_number_of_leaf_nodes(uint8_t port_id,
+rte_tm_get_number_of_leaf_nodes(uint16_t port_id,
uint32_t *n_leaf_nodes,
struct rte_tm_error *error)
{
@@ -113,7 +113,7 @@ rte_tm_get_number_of_leaf_nodes(uint8_t port_id,
/* Check node type (leaf or non-leaf) */
int
-rte_tm_node_type_get(uint8_t port_id,
+rte_tm_node_type_get(uint16_t port_id,
uint32_t node_id,
int *is_leaf,
struct rte_tm_error *error)
@@ -124,7 +124,7 @@ rte_tm_node_type_get(uint8_t port_id,
}
/* Get capabilities */
-int rte_tm_capabilities_get(uint8_t port_id,
+int rte_tm_capabilities_get(uint16_t port_id,
struct rte_tm_capabilities *cap,
struct rte_tm_error *error)
{
@@ -134,7 +134,7 @@ int rte_tm_capabilities_get(uint8_t port_id,
}
/* Get level capabilities */
-int rte_tm_level_capabilities_get(uint8_t port_id,
+int rte_tm_level_capabilities_get(uint16_t port_id,
uint32_t level_id,
struct rte_tm_level_capabilities *cap,
struct rte_tm_error *error)
@@ -145,7 +145,7 @@ int rte_tm_level_capabilities_get(uint8_t port_id,
}
/* Get node capabilities */
-int rte_tm_node_capabilities_get(uint8_t port_id,
+int rte_tm_node_capabilities_get(uint16_t port_id,
uint32_t node_id,
struct rte_tm_node_capabilities *cap,
struct rte_tm_error *error)
@@ -156,7 +156,7 @@ int rte_tm_node_capabilities_get(uint8_t port_id,
}
/* Add WRED profile */
-int rte_tm_wred_profile_add(uint8_t port_id,
+int rte_tm_wred_profile_add(uint16_t port_id,
uint32_t wred_profile_id,
struct rte_tm_wred_params *profile,
struct rte_tm_error *error)
@@ -167,7 +167,7 @@ int rte_tm_wred_profile_add(uint8_t port_id,
}
/* Delete WRED profile */
-int rte_tm_wred_profile_delete(uint8_t port_id,
+int rte_tm_wred_profile_delete(uint16_t port_id,
uint32_t wred_profile_id,
struct rte_tm_error *error)
{
@@ -177,7 +177,7 @@ int rte_tm_wred_profile_delete(uint8_t port_id,
}
/* Add/update shared WRED context */
-int rte_tm_shared_wred_context_add_update(uint8_t port_id,
+int rte_tm_shared_wred_context_add_update(uint16_t port_id,
uint32_t shared_wred_context_id,
uint32_t wred_profile_id,
struct rte_tm_error *error)
@@ -188,7 +188,7 @@ int rte_tm_shared_wred_context_add_update(uint8_t port_id,
}
/* Delete shared WRED context */
-int rte_tm_shared_wred_context_delete(uint8_t port_id,
+int rte_tm_shared_wred_context_delete(uint16_t port_id,
uint32_t shared_wred_context_id,
struct rte_tm_error *error)
{
@@ -198,7 +198,7 @@ int rte_tm_shared_wred_context_delete(uint8_t port_id,
}
/* Add shaper profile */
-int rte_tm_shaper_profile_add(uint8_t port_id,
+int rte_tm_shaper_profile_add(uint16_t port_id,
uint32_t shaper_profile_id,
struct rte_tm_shaper_params *profile,
struct rte_tm_error *error)
@@ -209,7 +209,7 @@ int rte_tm_shaper_profile_add(uint8_t port_id,
}
/* Delete WRED profile */
-int rte_tm_shaper_profile_delete(uint8_t port_id,
+int rte_tm_shaper_profile_delete(uint16_t port_id,
uint32_t shaper_profile_id,
struct rte_tm_error *error)
{
@@ -219,7 +219,7 @@ int rte_tm_shaper_profile_delete(uint8_t port_id,
}
/* Add shared shaper */
-int rte_tm_shared_shaper_add_update(uint8_t port_id,
+int rte_tm_shared_shaper_add_update(uint16_t port_id,
uint32_t shared_shaper_id,
uint32_t shaper_profile_id,
struct rte_tm_error *error)
@@ -230,7 +230,7 @@ int rte_tm_shared_shaper_add_update(uint8_t port_id,
}
/* Delete shared shaper */
-int rte_tm_shared_shaper_delete(uint8_t port_id,
+int rte_tm_shared_shaper_delete(uint16_t port_id,
uint32_t shared_shaper_id,
struct rte_tm_error *error)
{
@@ -240,7 +240,7 @@ int rte_tm_shared_shaper_delete(uint8_t port_id,
}
/* Add node to port traffic manager hierarchy */
-int rte_tm_node_add(uint8_t port_id,
+int rte_tm_node_add(uint16_t port_id,
uint32_t node_id,
uint32_t parent_node_id,
uint32_t priority,
@@ -256,7 +256,7 @@ int rte_tm_node_add(uint8_t port_id,
}
/* Delete node from traffic manager hierarchy */
-int rte_tm_node_delete(uint8_t port_id,
+int rte_tm_node_delete(uint16_t port_id,
uint32_t node_id,
struct rte_tm_error *error)
{
@@ -266,7 +266,7 @@ int rte_tm_node_delete(uint8_t port_id,
}
/* Suspend node */
-int rte_tm_node_suspend(uint8_t port_id,
+int rte_tm_node_suspend(uint16_t port_id,
uint32_t node_id,
struct rte_tm_error *error)
{
@@ -276,7 +276,7 @@ int rte_tm_node_suspend(uint8_t port_id,
}
/* Resume node */
-int rte_tm_node_resume(uint8_t port_id,
+int rte_tm_node_resume(uint16_t port_id,
uint32_t node_id,
struct rte_tm_error *error)
{
@@ -286,7 +286,7 @@ int rte_tm_node_resume(uint8_t port_id,
}
/* Commit the initial port traffic manager hierarchy */
-int rte_tm_hierarchy_commit(uint8_t port_id,
+int rte_tm_hierarchy_commit(uint16_t port_id,
int clear_on_fail,
struct rte_tm_error *error)
{
@@ -296,7 +296,7 @@ int rte_tm_hierarchy_commit(uint8_t port_id,
}
/* Update node parent */
-int rte_tm_node_parent_update(uint8_t port_id,
+int rte_tm_node_parent_update(uint16_t port_id,
uint32_t node_id,
uint32_t parent_node_id,
uint32_t priority,
@@ -309,7 +309,7 @@ int rte_tm_node_parent_update(uint8_t port_id,
}
/* Update node private shaper */
-int rte_tm_node_shaper_update(uint8_t port_id,
+int rte_tm_node_shaper_update(uint16_t port_id,
uint32_t node_id,
uint32_t shaper_profile_id,
struct rte_tm_error *error)
@@ -320,7 +320,7 @@ int rte_tm_node_shaper_update(uint8_t port_id,
}
/* Update node shared shapers */
-int rte_tm_node_shared_shaper_update(uint8_t port_id,
+int rte_tm_node_shared_shaper_update(uint16_t port_id,
uint32_t node_id,
uint32_t shared_shaper_id,
int add,
@@ -332,7 +332,7 @@ int rte_tm_node_shared_shaper_update(uint8_t port_id,
}
/* Update node stats */
-int rte_tm_node_stats_update(uint8_t port_id,
+int rte_tm_node_stats_update(uint16_t port_id,
uint32_t node_id,
uint64_t stats_mask,
struct rte_tm_error *error)
@@ -343,7 +343,7 @@ int rte_tm_node_stats_update(uint8_t port_id,
}
/* Update WFQ weight mode */
-int rte_tm_node_wfq_weight_mode_update(uint8_t port_id,
+int rte_tm_node_wfq_weight_mode_update(uint16_t port_id,
uint32_t node_id,
int *wfq_weight_mode,
uint32_t n_sp_priorities,
@@ -355,7 +355,7 @@ int rte_tm_node_wfq_weight_mode_update(uint8_t port_id,
}
/* Update node congestion management mode */
-int rte_tm_node_cman_update(uint8_t port_id,
+int rte_tm_node_cman_update(uint16_t port_id,
uint32_t node_id,
enum rte_tm_cman_mode cman,
struct rte_tm_error *error)
@@ -366,7 +366,7 @@ int rte_tm_node_cman_update(uint8_t port_id,
}
/* Update node private WRED context */
-int rte_tm_node_wred_context_update(uint8_t port_id,
+int rte_tm_node_wred_context_update(uint16_t port_id,
uint32_t node_id,
uint32_t wred_profile_id,
struct rte_tm_error *error)
@@ -377,7 +377,7 @@ int rte_tm_node_wred_context_update(uint8_t port_id,
}
/* Update node shared WRED context */
-int rte_tm_node_shared_wred_context_update(uint8_t port_id,
+int rte_tm_node_shared_wred_context_update(uint16_t port_id,
uint32_t node_id,
uint32_t shared_wred_context_id,
int add,
@@ -389,7 +389,7 @@ int rte_tm_node_shared_wred_context_update(uint8_t port_id,
}
/* Read and/or clear stats counters for specific node */
-int rte_tm_node_stats_read(uint8_t port_id,
+int rte_tm_node_stats_read(uint16_t port_id,
uint32_t node_id,
struct rte_tm_node_stats *stats,
uint64_t *stats_mask,
@@ -402,7 +402,7 @@ int rte_tm_node_stats_read(uint8_t port_id,
}
/* Packet marking - VLAN DEI */
-int rte_tm_mark_vlan_dei(uint8_t port_id,
+int rte_tm_mark_vlan_dei(uint16_t port_id,
int mark_green,
int mark_yellow,
int mark_red,
@@ -414,7 +414,7 @@ int rte_tm_mark_vlan_dei(uint8_t port_id,
}
/* Packet marking - IPv4/IPv6 ECN */
-int rte_tm_mark_ip_ecn(uint8_t port_id,
+int rte_tm_mark_ip_ecn(uint16_t port_id,
int mark_green,
int mark_yellow,
int mark_red,
@@ -426,7 +426,7 @@ int rte_tm_mark_ip_ecn(uint8_t port_id,
}
/* Packet marking - IPv4/IPv6 DSCP */
-int rte_tm_mark_ip_dscp(uint8_t port_id,
+int rte_tm_mark_ip_dscp(uint16_t port_id,
int mark_green,
int mark_yellow,
int mark_red,
diff --git a/lib/librte_ether/rte_tm.h b/lib/librte_ether/rte_tm.h
index ebbfa1ee..2b25a871 100644
--- a/lib/librte_ether/rte_tm.h
+++ b/lib/librte_ether/rte_tm.h
@@ -1040,7 +1040,7 @@ struct rte_tm_error {
* 0 on success, non-zero error code otherwise.
*/
int
-rte_tm_get_number_of_leaf_nodes(uint8_t port_id,
+rte_tm_get_number_of_leaf_nodes(uint16_t port_id,
uint32_t *n_leaf_nodes,
struct rte_tm_error *error);
@@ -1064,7 +1064,7 @@ rte_tm_get_number_of_leaf_nodes(uint8_t port_id,
* 0 on success, non-zero error code otherwise.
*/
int
-rte_tm_node_type_get(uint8_t port_id,
+rte_tm_node_type_get(uint16_t port_id,
uint32_t node_id,
int *is_leaf,
struct rte_tm_error *error);
@@ -1082,7 +1082,7 @@ rte_tm_node_type_get(uint8_t port_id,
* 0 on success, non-zero error code otherwise.
*/
int
-rte_tm_capabilities_get(uint8_t port_id,
+rte_tm_capabilities_get(uint16_t port_id,
struct rte_tm_capabilities *cap,
struct rte_tm_error *error);
@@ -1102,7 +1102,7 @@ rte_tm_capabilities_get(uint8_t port_id,
* 0 on success, non-zero error code otherwise.
*/
int
-rte_tm_level_capabilities_get(uint8_t port_id,
+rte_tm_level_capabilities_get(uint16_t port_id,
uint32_t level_id,
struct rte_tm_level_capabilities *cap,
struct rte_tm_error *error);
@@ -1122,7 +1122,7 @@ rte_tm_level_capabilities_get(uint8_t port_id,
* 0 on success, non-zero error code otherwise.
*/
int
-rte_tm_node_capabilities_get(uint8_t port_id,
+rte_tm_node_capabilities_get(uint16_t port_id,
uint32_t node_id,
struct rte_tm_node_capabilities *cap,
struct rte_tm_error *error);
@@ -1147,7 +1147,7 @@ rte_tm_node_capabilities_get(uint8_t port_id,
* @see struct rte_tm_capabilities::cman_wred_context_n_max
*/
int
-rte_tm_wred_profile_add(uint8_t port_id,
+rte_tm_wred_profile_add(uint16_t port_id,
uint32_t wred_profile_id,
struct rte_tm_wred_params *profile,
struct rte_tm_error *error);
@@ -1170,7 +1170,7 @@ rte_tm_wred_profile_add(uint8_t port_id,
* @see struct rte_tm_capabilities::cman_wred_context_n_max
*/
int
-rte_tm_wred_profile_delete(uint8_t port_id,
+rte_tm_wred_profile_delete(uint16_t port_id,
uint32_t wred_profile_id,
struct rte_tm_error *error);
@@ -1201,7 +1201,7 @@ rte_tm_wred_profile_delete(uint8_t port_id,
* @see struct rte_tm_capabilities::cman_wred_context_shared_n_max
*/
int
-rte_tm_shared_wred_context_add_update(uint8_t port_id,
+rte_tm_shared_wred_context_add_update(uint16_t port_id,
uint32_t shared_wred_context_id,
uint32_t wred_profile_id,
struct rte_tm_error *error);
@@ -1225,7 +1225,7 @@ rte_tm_shared_wred_context_add_update(uint8_t port_id,
* @see struct rte_tm_capabilities::cman_wred_context_shared_n_max
*/
int
-rte_tm_shared_wred_context_delete(uint8_t port_id,
+rte_tm_shared_wred_context_delete(uint16_t port_id,
uint32_t shared_wred_context_id,
struct rte_tm_error *error);
@@ -1249,7 +1249,7 @@ rte_tm_shared_wred_context_delete(uint8_t port_id,
* @see struct rte_tm_capabilities::shaper_n_max
*/
int
-rte_tm_shaper_profile_add(uint8_t port_id,
+rte_tm_shaper_profile_add(uint16_t port_id,
uint32_t shaper_profile_id,
struct rte_tm_shaper_params *profile,
struct rte_tm_error *error);
@@ -1272,7 +1272,7 @@ rte_tm_shaper_profile_add(uint8_t port_id,
* @see struct rte_tm_capabilities::shaper_n_max
*/
int
-rte_tm_shaper_profile_delete(uint8_t port_id,
+rte_tm_shaper_profile_delete(uint16_t port_id,
uint32_t shaper_profile_id,
struct rte_tm_error *error);
@@ -1301,7 +1301,7 @@ rte_tm_shaper_profile_delete(uint8_t port_id,
* @see struct rte_tm_capabilities::shaper_shared_n_max
*/
int
-rte_tm_shared_shaper_add_update(uint8_t port_id,
+rte_tm_shared_shaper_add_update(uint16_t port_id,
uint32_t shared_shaper_id,
uint32_t shaper_profile_id,
struct rte_tm_error *error);
@@ -1324,7 +1324,7 @@ rte_tm_shared_shaper_add_update(uint8_t port_id,
* @see struct rte_tm_capabilities::shaper_shared_n_max
*/
int
-rte_tm_shared_shaper_delete(uint8_t port_id,
+rte_tm_shared_shaper_delete(uint16_t port_id,
uint32_t shared_shaper_id,
struct rte_tm_error *error);
@@ -1392,7 +1392,7 @@ rte_tm_shared_shaper_delete(uint8_t port_id,
* @see struct rte_tm_capabilities
*/
int
-rte_tm_node_add(uint8_t port_id,
+rte_tm_node_add(uint16_t port_id,
uint32_t node_id,
uint32_t parent_node_id,
uint32_t priority,
@@ -1425,7 +1425,7 @@ rte_tm_node_add(uint8_t port_id,
* @see RTE_TM_UPDATE_NODE_ADD_DELETE
*/
int
-rte_tm_node_delete(uint8_t port_id,
+rte_tm_node_delete(uint16_t port_id,
uint32_t node_id,
struct rte_tm_error *error);
@@ -1449,7 +1449,7 @@ rte_tm_node_delete(uint8_t port_id,
* @see RTE_TM_UPDATE_NODE_SUSPEND_RESUME
*/
int
-rte_tm_node_suspend(uint8_t port_id,
+rte_tm_node_suspend(uint16_t port_id,
uint32_t node_id,
struct rte_tm_error *error);
@@ -1472,7 +1472,7 @@ rte_tm_node_suspend(uint8_t port_id,
* @see RTE_TM_UPDATE_NODE_SUSPEND_RESUME
*/
int
-rte_tm_node_resume(uint8_t port_id,
+rte_tm_node_resume(uint16_t port_id,
uint32_t node_id,
struct rte_tm_error *error);
@@ -1513,7 +1513,7 @@ rte_tm_node_resume(uint8_t port_id,
* @see rte_tm_node_delete()
*/
int
-rte_tm_hierarchy_commit(uint8_t port_id,
+rte_tm_hierarchy_commit(uint16_t port_id,
int clear_on_fail,
struct rte_tm_error *error);
@@ -1549,7 +1549,7 @@ rte_tm_hierarchy_commit(uint8_t port_id,
* @see RTE_TM_UPDATE_NODE_PARENT_CHANGE_LEVEL
*/
int
-rte_tm_node_parent_update(uint8_t port_id,
+rte_tm_node_parent_update(uint16_t port_id,
uint32_t node_id,
uint32_t parent_node_id,
uint32_t priority,
@@ -1578,7 +1578,7 @@ rte_tm_node_parent_update(uint8_t port_id,
* @see struct rte_tm_capabilities::shaper_private_n_max
*/
int
-rte_tm_node_shaper_update(uint8_t port_id,
+rte_tm_node_shaper_update(uint16_t port_id,
uint32_t node_id,
uint32_t shaper_profile_id,
struct rte_tm_error *error);
@@ -1605,7 +1605,7 @@ rte_tm_node_shaper_update(uint8_t port_id,
* @see struct rte_tm_capabilities::shaper_shared_n_max
*/
int
-rte_tm_node_shared_shaper_update(uint8_t port_id,
+rte_tm_node_shared_shaper_update(uint16_t port_id,
uint32_t node_id,
uint32_t shared_shaper_id,
int add,
@@ -1632,7 +1632,7 @@ rte_tm_node_shared_shaper_update(uint8_t port_id,
* @see RTE_TM_UPDATE_NODE_STATS
*/
int
-rte_tm_node_stats_update(uint8_t port_id,
+rte_tm_node_stats_update(uint16_t port_id,
uint32_t node_id,
uint64_t stats_mask,
struct rte_tm_error *error);
@@ -1660,7 +1660,7 @@ rte_tm_node_stats_update(uint8_t port_id,
* @see RTE_TM_UPDATE_NODE_N_SP_PRIORITIES
*/
int
-rte_tm_node_wfq_weight_mode_update(uint8_t port_id,
+rte_tm_node_wfq_weight_mode_update(uint16_t port_id,
uint32_t node_id,
int *wfq_weight_mode,
uint32_t n_sp_priorities,
@@ -1683,7 +1683,7 @@ rte_tm_node_wfq_weight_mode_update(uint8_t port_id,
* @see RTE_TM_UPDATE_NODE_CMAN
*/
int
-rte_tm_node_cman_update(uint8_t port_id,
+rte_tm_node_cman_update(uint16_t port_id,
uint32_t node_id,
enum rte_tm_cman_mode cman,
struct rte_tm_error *error);
@@ -1707,7 +1707,7 @@ rte_tm_node_cman_update(uint8_t port_id,
* @see struct rte_tm_capabilities::cman_wred_context_private_n_max
*/
int
-rte_tm_node_wred_context_update(uint8_t port_id,
+rte_tm_node_wred_context_update(uint16_t port_id,
uint32_t node_id,
uint32_t wred_profile_id,
struct rte_tm_error *error);
@@ -1732,7 +1732,7 @@ rte_tm_node_wred_context_update(uint8_t port_id,
* @see struct rte_tm_capabilities::cman_wred_context_shared_n_max
*/
int
-rte_tm_node_shared_wred_context_update(uint8_t port_id,
+rte_tm_node_shared_wred_context_update(uint16_t port_id,
uint32_t node_id,
uint32_t shared_wred_context_id,
int add,
@@ -1764,7 +1764,7 @@ rte_tm_node_shared_wred_context_update(uint8_t port_id,
* @see enum rte_tm_stats_type
*/
int
-rte_tm_node_stats_read(uint8_t port_id,
+rte_tm_node_stats_read(uint16_t port_id,
uint32_t node_id,
struct rte_tm_node_stats *stats,
uint64_t *stats_mask,
@@ -1801,7 +1801,7 @@ rte_tm_node_stats_read(uint8_t port_id,
* @see struct rte_tm_capabilities::mark_vlan_dei_supported
*/
int
-rte_tm_mark_vlan_dei(uint8_t port_id,
+rte_tm_mark_vlan_dei(uint16_t port_id,
int mark_green,
int mark_yellow,
int mark_red,
@@ -1851,7 +1851,7 @@ rte_tm_mark_vlan_dei(uint8_t port_id,
* @see struct rte_tm_capabilities::mark_ip_ecn_sctp_supported
*/
int
-rte_tm_mark_ip_ecn(uint8_t port_id,
+rte_tm_mark_ip_ecn(uint16_t port_id,
int mark_green,
int mark_yellow,
int mark_red,
@@ -1899,7 +1899,7 @@ rte_tm_mark_ip_ecn(uint8_t port_id,
* @see struct rte_tm_capabilities::mark_ip_dscp_supported
*/
int
-rte_tm_mark_ip_dscp(uint8_t port_id,
+rte_tm_mark_ip_dscp(uint16_t port_id,
int mark_green,
int mark_yellow,
int mark_red,
diff --git a/lib/librte_ether/rte_tm_driver.h b/lib/librte_ether/rte_tm_driver.h
index a5b698fe..b2e8ccf8 100644
--- a/lib/librte_ether/rte_tm_driver.h
+++ b/lib/librte_ether/rte_tm_driver.h
@@ -357,7 +357,7 @@ rte_tm_error_set(struct rte_tm_error *error,
* success, NULL otherwise.
*/
const struct rte_tm_ops *
-rte_tm_ops_get(uint8_t port_id, struct rte_tm_error *error);
+rte_tm_ops_get(uint16_t port_id, struct rte_tm_error *error);
#ifdef __cplusplus
}
diff --git a/lib/librte_eventdev/Makefile b/lib/librte_eventdev/Makefile
index 410578a1..5ac22cde 100644
--- a/lib/librte_eventdev/Makefile
+++ b/lib/librte_eventdev/Makefile
@@ -34,15 +34,17 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_eventdev.a
# library version
-LIBABIVER := 2
+LIBABIVER := 3
# build flags
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_eal -lrte_ring -lrte_ethdev -lrte_hash
# library source files
SRCS-y += rte_eventdev.c
SRCS-y += rte_event_ring.c
+SRCS-y += rte_event_eth_rx_adapter.c
# export include files
SYMLINK-y-include += rte_eventdev.h
@@ -50,6 +52,7 @@ SYMLINK-y-include += rte_eventdev_pmd.h
SYMLINK-y-include += rte_eventdev_pmd_pci.h
SYMLINK-y-include += rte_eventdev_pmd_vdev.h
SYMLINK-y-include += rte_event_ring.h
+SYMLINK-y-include += rte_event_eth_rx_adapter.h
# versioning export map
EXPORT_MAP := rte_eventdev_version.map
diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.c b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
new file mode 100644
index 00000000..90106e6c
--- /dev/null
+++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
@@ -0,0 +1,1240 @@
+#include <rte_cycles.h>
+#include <rte_common.h>
+#include <rte_dev.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_service_component.h>
+#include <rte_thash.h>
+
+#include "rte_eventdev.h"
+#include "rte_eventdev_pmd.h"
+#include "rte_event_eth_rx_adapter.h"
+
+#define BATCH_SIZE 32
+#define BLOCK_CNT_THRESHOLD 10
+#define ETH_EVENT_BUFFER_SIZE (4*BATCH_SIZE)
+
+#define ETH_RX_ADAPTER_SERVICE_NAME_LEN 32
+#define ETH_RX_ADAPTER_MEM_NAME_LEN 32
+
+#define RSS_KEY_SIZE 40
+
+/*
+ * There is an instance of this struct per polled Rx queue added to the
+ * adapter
+ */
+struct eth_rx_poll_entry {
+ /* Eth port to poll */
+ uint8_t eth_dev_id;
+ /* Eth rx queue to poll */
+ uint16_t eth_rx_qid;
+};
+
+/* Instance per adapter */
+struct rte_eth_event_enqueue_buffer {
+ /* Count of events in this buffer */
+ uint16_t count;
+ /* Array of events in this buffer */
+ struct rte_event events[ETH_EVENT_BUFFER_SIZE];
+};
+
+struct rte_event_eth_rx_adapter {
+ /* RSS key */
+ uint8_t rss_key_be[RSS_KEY_SIZE];
+ /* Event device identifier */
+ uint8_t eventdev_id;
+ /* Per ethernet device structure */
+ struct eth_device_info *eth_devices;
+ /* Event port identifier */
+ uint8_t event_port_id;
+ /* Lock to serialize config updates with service function */
+ rte_spinlock_t rx_lock;
+ /* Max mbufs processed in any service function invocation */
+ uint32_t max_nb_rx;
+ /* Receive queues that need to be polled */
+ struct eth_rx_poll_entry *eth_rx_poll;
+ /* Size of the eth_rx_poll array */
+ uint16_t num_rx_polled;
+ /* Weighted round robin schedule */
+ uint32_t *wrr_sched;
+ /* wrr_sched[] size */
+ uint32_t wrr_len;
+ /* Next entry in wrr[] to begin polling */
+ uint32_t wrr_pos;
+ /* Event burst buffer */
+ struct rte_eth_event_enqueue_buffer event_enqueue_buffer;
+ /* Per adapter stats */
+ struct rte_event_eth_rx_adapter_stats stats;
+ /* Block count, counts up to BLOCK_CNT_THRESHOLD */
+ uint16_t enq_block_count;
+ /* Block start ts */
+ uint64_t rx_enq_block_start_ts;
+ /* Configuration callback for rte_service configuration */
+ rte_event_eth_rx_adapter_conf_cb conf_cb;
+ /* Configuration callback argument */
+ void *conf_arg;
+ /* Set if default_cb is being used */
+ int default_cb_arg;
+ /* Service initialization state */
+ uint8_t service_inited;
+ /* Total count of Rx queues in adapter */
+ uint32_t nb_queues;
+ /* Memory allocation name */
+ char mem_name[ETH_RX_ADAPTER_MEM_NAME_LEN];
+ /* Socket identifier cached from eventdev */
+ int socket_id;
+ /* Per adapter EAL service */
+ uint32_t service_id;
+} __rte_cache_aligned;
+
+/* Per eth device */
+struct eth_device_info {
+ struct rte_eth_dev *dev;
+ struct eth_rx_queue_info *rx_queue;
+ /* Set if ethdev->eventdev packet transfer uses a
+ * hardware mechanism
+ */
+ uint8_t internal_event_port;
+ /* Set if the adapter is processing rx queues for
+ * this eth device and packet processing has been
+ * started, allows for the code to know if the PMD
+ * rx_adapter_stop callback needs to be invoked
+ */
+ uint8_t dev_rx_started;
+ /* If nb_dev_queues > 0, the start callback will
+ * be invoked if not already invoked
+ */
+ uint16_t nb_dev_queues;
+};
+
+/* Per Rx queue */
+struct eth_rx_queue_info {
+ int queue_enabled; /* True if added */
+ uint16_t wt; /* Polling weight */
+ uint8_t event_queue_id; /* Event queue to enqueue packets to */
+ uint8_t sched_type; /* Sched type for events */
+ uint8_t priority; /* Event priority */
+ uint32_t flow_id; /* App provided flow identifier */
+ uint32_t flow_id_mask; /* Set to ~0 if app provides flow id else 0 */
+};
+
+static struct rte_event_eth_rx_adapter **event_eth_rx_adapter;
+
+static inline int
+valid_id(uint8_t id)
+{
+ return id < RTE_EVENT_ETH_RX_ADAPTER_MAX_INSTANCE;
+}
+
+#define RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, retval) do { \
+ if (!valid_id(id)) { \
+ RTE_EDEV_LOG_ERR("Invalid eth Rx adapter id = %d\n", id); \
+ return retval; \
+ } \
+} while (0)
+
+static inline int
+sw_rx_adapter_queue_count(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+ return rx_adapter->num_rx_polled;
+}
+
+/* Greatest common divisor */
+static uint16_t gcd_u16(uint16_t a, uint16_t b)
+{
+ uint16_t r = a % b;
+
+ return r ? gcd_u16(b, r) : b;
+}
+
+/* Returns the next queue in the polling sequence
+ *
+ * http://kb.linuxvirtualserver.org/wiki/Weighted_Round-Robin_Scheduling
+ */
+static int
+wrr_next(struct rte_event_eth_rx_adapter *rx_adapter,
+ unsigned int n, int *cw,
+ struct eth_rx_poll_entry *eth_rx_poll, uint16_t max_wt,
+ uint16_t gcd, int prev)
+{
+ int i = prev;
+ uint16_t w;
+
+ while (1) {
+ uint16_t q;
+ uint8_t d;
+
+ i = (i + 1) % n;
+ if (i == 0) {
+ *cw = *cw - gcd;
+ if (*cw <= 0)
+ *cw = max_wt;
+ }
+
+ q = eth_rx_poll[i].eth_rx_qid;
+ d = eth_rx_poll[i].eth_dev_id;
+ w = rx_adapter->eth_devices[d].rx_queue[q].wt;
+
+ if ((int)w >= *cw)
+ return i;
+ }
+}
+
+/* Precalculate WRR polling sequence for all queues in rx_adapter */
+static int
+eth_poll_wrr_calc(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+ uint8_t d;
+ uint16_t q;
+ unsigned int i;
+
+ /* Initialize variables for calculation of wrr schedule */
+ uint16_t max_wrr_pos = 0;
+ unsigned int poll_q = 0;
+ uint16_t max_wt = 0;
+ uint16_t gcd = 0;
+
+ struct eth_rx_poll_entry *rx_poll = NULL;
+ uint32_t *rx_wrr = NULL;
+
+ if (rx_adapter->num_rx_polled) {
+ size_t len = RTE_ALIGN(rx_adapter->num_rx_polled *
+ sizeof(*rx_adapter->eth_rx_poll),
+ RTE_CACHE_LINE_SIZE);
+ rx_poll = rte_zmalloc_socket(rx_adapter->mem_name,
+ len,
+ RTE_CACHE_LINE_SIZE,
+ rx_adapter->socket_id);
+ if (rx_poll == NULL)
+ return -ENOMEM;
+
+ /* Generate array of all queues to poll, the size of this
+ * array is poll_q
+ */
+ for (d = 0; d < rte_eth_dev_count(); d++) {
+ uint16_t nb_rx_queues;
+ struct eth_device_info *dev_info =
+ &rx_adapter->eth_devices[d];
+ nb_rx_queues = dev_info->dev->data->nb_rx_queues;
+ if (dev_info->rx_queue == NULL)
+ continue;
+ for (q = 0; q < nb_rx_queues; q++) {
+ struct eth_rx_queue_info *queue_info =
+ &dev_info->rx_queue[q];
+ if (queue_info->queue_enabled == 0)
+ continue;
+
+ uint16_t wt = queue_info->wt;
+ rx_poll[poll_q].eth_dev_id = d;
+ rx_poll[poll_q].eth_rx_qid = q;
+ max_wrr_pos += wt;
+ max_wt = RTE_MAX(max_wt, wt);
+ gcd = (gcd) ? gcd_u16(gcd, wt) : wt;
+ poll_q++;
+ }
+ }
+
+ len = RTE_ALIGN(max_wrr_pos * sizeof(*rx_wrr),
+ RTE_CACHE_LINE_SIZE);
+ rx_wrr = rte_zmalloc_socket(rx_adapter->mem_name,
+ len,
+ RTE_CACHE_LINE_SIZE,
+ rx_adapter->socket_id);
+ if (rx_wrr == NULL) {
+ rte_free(rx_poll);
+ return -ENOMEM;
+ }
+
+ /* Generate polling sequence based on weights */
+ int prev = -1;
+ int cw = -1;
+ for (i = 0; i < max_wrr_pos; i++) {
+ rx_wrr[i] = wrr_next(rx_adapter, poll_q, &cw,
+ rx_poll, max_wt, gcd, prev);
+ prev = rx_wrr[i];
+ }
+ }
+
+ rte_free(rx_adapter->eth_rx_poll);
+ rte_free(rx_adapter->wrr_sched);
+
+ rx_adapter->eth_rx_poll = rx_poll;
+ rx_adapter->wrr_sched = rx_wrr;
+ rx_adapter->wrr_len = max_wrr_pos;
+
+ return 0;
+}
+
+static inline void
+mtoip(struct rte_mbuf *m, struct ipv4_hdr **ipv4_hdr,
+ struct ipv6_hdr **ipv6_hdr)
+{
+ struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ struct vlan_hdr *vlan_hdr;
+
+ *ipv4_hdr = NULL;
+ *ipv6_hdr = NULL;
+
+ switch (eth_hdr->ether_type) {
+ case RTE_BE16(ETHER_TYPE_IPv4):
+ *ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+ break;
+
+ case RTE_BE16(ETHER_TYPE_IPv6):
+ *ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1);
+ break;
+
+ case RTE_BE16(ETHER_TYPE_VLAN):
+ vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
+ switch (vlan_hdr->eth_proto) {
+ case RTE_BE16(ETHER_TYPE_IPv4):
+ *ipv4_hdr = (struct ipv4_hdr *)(vlan_hdr + 1);
+ break;
+ case RTE_BE16(ETHER_TYPE_IPv6):
+ *ipv6_hdr = (struct ipv6_hdr *)(vlan_hdr + 1);
+ break;
+ default:
+ break;
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+/* Calculate RSS hash for IPv4/6 */
+static inline uint32_t
+do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be)
+{
+ uint32_t input_len;
+ void *tuple;
+ struct rte_ipv4_tuple ipv4_tuple;
+ struct rte_ipv6_tuple ipv6_tuple;
+ struct ipv4_hdr *ipv4_hdr;
+ struct ipv6_hdr *ipv6_hdr;
+
+ mtoip(m, &ipv4_hdr, &ipv6_hdr);
+
+ if (ipv4_hdr) {
+ ipv4_tuple.src_addr = rte_be_to_cpu_32(ipv4_hdr->src_addr);
+ ipv4_tuple.dst_addr = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
+ tuple = &ipv4_tuple;
+ input_len = RTE_THASH_V4_L3_LEN;
+ } else if (ipv6_hdr) {
+ rte_thash_load_v6_addrs(ipv6_hdr,
+ (union rte_thash_tuple *)&ipv6_tuple);
+ tuple = &ipv6_tuple;
+ input_len = RTE_THASH_V6_L3_LEN;
+ } else
+ return 0;
+
+ return rte_softrss_be(tuple, input_len, rss_key_be);
+}
+
+static inline int
+rx_enq_blocked(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+ return !!rx_adapter->enq_block_count;
+}
+
+static inline void
+rx_enq_block_start_ts(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+ if (rx_adapter->rx_enq_block_start_ts)
+ return;
+
+ rx_adapter->enq_block_count++;
+ if (rx_adapter->enq_block_count < BLOCK_CNT_THRESHOLD)
+ return;
+
+ rx_adapter->rx_enq_block_start_ts = rte_get_tsc_cycles();
+}
+
+static inline void
+rx_enq_block_end_ts(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct rte_event_eth_rx_adapter_stats *stats)
+{
+ if (unlikely(!stats->rx_enq_start_ts))
+ stats->rx_enq_start_ts = rte_get_tsc_cycles();
+
+ if (likely(!rx_enq_blocked(rx_adapter)))
+ return;
+
+ rx_adapter->enq_block_count = 0;
+ if (rx_adapter->rx_enq_block_start_ts) {
+ stats->rx_enq_end_ts = rte_get_tsc_cycles();
+ stats->rx_enq_block_cycles += stats->rx_enq_end_ts -
+ rx_adapter->rx_enq_block_start_ts;
+ rx_adapter->rx_enq_block_start_ts = 0;
+ }
+}
+
+/* Add event to buffer, free space check is done prior to calling
+ * this function
+ */
+static inline void
+buf_event_enqueue(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct rte_event *ev)
+{
+ struct rte_eth_event_enqueue_buffer *buf =
+ &rx_adapter->event_enqueue_buffer;
+ rte_memcpy(&buf->events[buf->count++], ev, sizeof(struct rte_event));
+}
+
+/* Enqueue buffered events to event device */
+static inline uint16_t
+flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+ struct rte_eth_event_enqueue_buffer *buf =
+ &rx_adapter->event_enqueue_buffer;
+ struct rte_event_eth_rx_adapter_stats *stats = &rx_adapter->stats;
+
+ uint16_t n = rte_event_enqueue_new_burst(rx_adapter->eventdev_id,
+ rx_adapter->event_port_id,
+ buf->events,
+ buf->count);
+ if (n != buf->count) {
+ memmove(buf->events,
+ &buf->events[n],
+ (buf->count - n) * sizeof(struct rte_event));
+ stats->rx_enq_retry++;
+ }
+
+ n ? rx_enq_block_end_ts(rx_adapter, stats) :
+ rx_enq_block_start_ts(rx_adapter);
+
+ buf->count -= n;
+ stats->rx_enq_count += n;
+
+ return n;
+}
+
+static inline void
+fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
+ uint8_t dev_id,
+ uint16_t rx_queue_id,
+ struct rte_mbuf **mbufs,
+ uint16_t num)
+{
+ uint32_t i;
+ struct eth_device_info *eth_device_info =
+ &rx_adapter->eth_devices[dev_id];
+ struct eth_rx_queue_info *eth_rx_queue_info =
+ &eth_device_info->rx_queue[rx_queue_id];
+
+ int32_t qid = eth_rx_queue_info->event_queue_id;
+ uint8_t sched_type = eth_rx_queue_info->sched_type;
+ uint8_t priority = eth_rx_queue_info->priority;
+ uint32_t flow_id;
+ struct rte_event events[BATCH_SIZE];
+ struct rte_mbuf *m = mbufs[0];
+ uint32_t rss_mask;
+ uint32_t rss;
+ int do_rss;
+
+ /* 0xffff ffff if PKT_RX_RSS_HASH is set, otherwise 0 */
+ rss_mask = ~(((m->ol_flags & PKT_RX_RSS_HASH) != 0) - 1);
+ do_rss = !rss_mask && !eth_rx_queue_info->flow_id_mask;
+
+ for (i = 0; i < num; i++) {
+ m = mbufs[i];
+ struct rte_event *ev = &events[i];
+
+ rss = do_rss ?
+ do_softrss(m, rx_adapter->rss_key_be) : m->hash.rss;
+ flow_id =
+ eth_rx_queue_info->flow_id &
+ eth_rx_queue_info->flow_id_mask;
+ flow_id |= rss & ~eth_rx_queue_info->flow_id_mask;
+
+ ev->flow_id = flow_id;
+ ev->op = RTE_EVENT_OP_NEW;
+ ev->sched_type = sched_type;
+ ev->queue_id = qid;
+ ev->event_type = RTE_EVENT_TYPE_ETH_RX_ADAPTER;
+ ev->sub_event_type = 0;
+ ev->priority = priority;
+ ev->mbuf = m;
+
+ buf_event_enqueue(rx_adapter, ev);
+ }
+}
+
+/*
+ * Polls receive queues added to the event adapter and enqueues received
+ * packets to the event device.
+ *
+ * The receive code enqueues initially to a temporary buffer, the
+ * temporary buffer is drained anytime it holds >= BATCH_SIZE packets
+ *
+ * If there isn't space available in the temporary buffer, packets from the
+ * Rx queue aren't dequeued from the eth device, this back pressures the
+ * eth device, in virtual device environments this back pressure is relayed to
+ * the hypervisor's switching layer where adjustments can be made to deal with
+ * it.
+ */
+static inline uint32_t
+eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+ uint32_t num_queue;
+ uint16_t n;
+ uint32_t nb_rx = 0;
+ struct rte_mbuf *mbufs[BATCH_SIZE];
+ struct rte_eth_event_enqueue_buffer *buf;
+ uint32_t wrr_pos;
+ uint32_t max_nb_rx;
+
+ wrr_pos = rx_adapter->wrr_pos;
+ max_nb_rx = rx_adapter->max_nb_rx;
+ buf = &rx_adapter->event_enqueue_buffer;
+ struct rte_event_eth_rx_adapter_stats *stats = &rx_adapter->stats;
+
+ /* Iterate through a WRR sequence */
+ for (num_queue = 0; num_queue < rx_adapter->wrr_len; num_queue++) {
+ unsigned int poll_idx = rx_adapter->wrr_sched[wrr_pos];
+ uint16_t qid = rx_adapter->eth_rx_poll[poll_idx].eth_rx_qid;
+ uint8_t d = rx_adapter->eth_rx_poll[poll_idx].eth_dev_id;
+
+ /* Don't do a batch dequeue from the rx queue if there isn't
+ * enough space in the enqueue buffer.
+ */
+ if (buf->count >= BATCH_SIZE)
+ flush_event_buffer(rx_adapter);
+ if (BATCH_SIZE > (ETH_EVENT_BUFFER_SIZE - buf->count))
+ break;
+
+ stats->rx_poll_count++;
+ n = rte_eth_rx_burst(d, qid, mbufs, BATCH_SIZE);
+
+ if (n) {
+ stats->rx_packets += n;
+ /* The check before rte_eth_rx_burst() ensures that
+ * all n mbufs can be buffered
+ */
+ fill_event_buffer(rx_adapter, d, qid, mbufs, n);
+ nb_rx += n;
+ if (nb_rx > max_nb_rx) {
+ rx_adapter->wrr_pos =
+ (wrr_pos + 1) % rx_adapter->wrr_len;
+ return nb_rx;
+ }
+ }
+
+ if (++wrr_pos == rx_adapter->wrr_len)
+ wrr_pos = 0;
+ }
+
+ return nb_rx;
+}
+
+static int
+event_eth_rx_adapter_service_func(void *args)
+{
+ struct rte_event_eth_rx_adapter *rx_adapter = args;
+ struct rte_eth_event_enqueue_buffer *buf;
+
+ buf = &rx_adapter->event_enqueue_buffer;
+ if (rte_spinlock_trylock(&rx_adapter->rx_lock) == 0)
+ return 0;
+ if (eth_rx_poll(rx_adapter) == 0 && buf->count)
+ flush_event_buffer(rx_adapter);
+ rte_spinlock_unlock(&rx_adapter->rx_lock);
+ return 0;
+}
+
+static int
+rte_event_eth_rx_adapter_init(void)
+{
+ const char *name = "rte_event_eth_rx_adapter_array";
+ const struct rte_memzone *mz;
+ unsigned int sz;
+
+ sz = sizeof(*event_eth_rx_adapter) *
+ RTE_EVENT_ETH_RX_ADAPTER_MAX_INSTANCE;
+ sz = RTE_ALIGN(sz, RTE_CACHE_LINE_SIZE);
+
+ mz = rte_memzone_lookup(name);
+ if (mz == NULL) {
+ mz = rte_memzone_reserve_aligned(name, sz, rte_socket_id(), 0,
+ RTE_CACHE_LINE_SIZE);
+ if (mz == NULL) {
+ RTE_EDEV_LOG_ERR("failed to reserve memzone err = %"
+ PRId32, rte_errno);
+ return -rte_errno;
+ }
+ }
+
+ event_eth_rx_adapter = mz->addr;
+ return 0;
+}
+
+static inline struct rte_event_eth_rx_adapter *
+id_to_rx_adapter(uint8_t id)
+{
+ return event_eth_rx_adapter ?
+ event_eth_rx_adapter[id] : NULL;
+}
+
+static int
+default_conf_cb(uint8_t id, uint8_t dev_id,
+ struct rte_event_eth_rx_adapter_conf *conf, void *arg)
+{
+ int ret;
+ struct rte_eventdev *dev;
+ struct rte_event_dev_config dev_conf;
+ int started;
+ uint8_t port_id;
+ struct rte_event_port_conf *port_conf = arg;
+ struct rte_event_eth_rx_adapter *rx_adapter = id_to_rx_adapter(id);
+
+ dev = &rte_eventdevs[rx_adapter->eventdev_id];
+ dev_conf = dev->data->dev_conf;
+
+ started = dev->data->dev_started;
+ if (started)
+ rte_event_dev_stop(dev_id);
+ port_id = dev_conf.nb_event_ports;
+ dev_conf.nb_event_ports += 1;
+ ret = rte_event_dev_configure(dev_id, &dev_conf);
+ if (ret) {
+ RTE_EDEV_LOG_ERR("failed to configure event dev %u\n",
+ dev_id);
+ if (started)
+ rte_event_dev_start(dev_id);
+ return ret;
+ }
+
+ ret = rte_event_port_setup(dev_id, port_id, port_conf);
+ if (ret) {
+ RTE_EDEV_LOG_ERR("failed to setup event port %u\n",
+ port_id);
+ return ret;
+ }
+
+ conf->event_port_id = port_id;
+ conf->max_nb_rx = 128;
+ if (started)
+ rte_event_dev_start(dev_id);
+ rx_adapter->default_cb_arg = 1;
+ return ret;
+}
+
+static int
+init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id)
+{
+ int ret;
+ struct rte_service_spec service;
+ struct rte_event_eth_rx_adapter_conf rx_adapter_conf;
+
+ if (rx_adapter->service_inited)
+ return 0;
+
+ memset(&service, 0, sizeof(service));
+ snprintf(service.name, ETH_RX_ADAPTER_SERVICE_NAME_LEN,
+ "rte_event_eth_rx_adapter_%d", id);
+ service.socket_id = rx_adapter->socket_id;
+ service.callback = event_eth_rx_adapter_service_func;
+ service.callback_userdata = rx_adapter;
+ /* Service function handles locking for queue add/del updates */
+ service.capabilities = RTE_SERVICE_CAP_MT_SAFE;
+ ret = rte_service_component_register(&service, &rx_adapter->service_id);
+ if (ret) {
+ RTE_EDEV_LOG_ERR("failed to register service %s err = %" PRId32,
+ service.name, ret);
+ return ret;
+ }
+
+ ret = rx_adapter->conf_cb(id, rx_adapter->eventdev_id,
+ &rx_adapter_conf, rx_adapter->conf_arg);
+ if (ret) {
+ RTE_EDEV_LOG_ERR("configuration callback failed err = %" PRId32,
+ ret);
+ goto err_done;
+ }
+ rx_adapter->event_port_id = rx_adapter_conf.event_port_id;
+ rx_adapter->max_nb_rx = rx_adapter_conf.max_nb_rx;
+ rx_adapter->service_inited = 1;
+ return 0;
+
+err_done:
+ rte_service_component_unregister(rx_adapter->service_id);
+ return ret;
+}
+
+
+static void
+update_queue_info(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_device_info *dev_info,
+ int32_t rx_queue_id,
+ uint8_t add)
+{
+ struct eth_rx_queue_info *queue_info;
+ int enabled;
+ uint16_t i;
+
+ if (dev_info->rx_queue == NULL)
+ return;
+
+ if (rx_queue_id == -1) {
+ for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++)
+ update_queue_info(rx_adapter, dev_info, i, add);
+ } else {
+ queue_info = &dev_info->rx_queue[rx_queue_id];
+ enabled = queue_info->queue_enabled;
+ if (add) {
+ rx_adapter->nb_queues += !enabled;
+ dev_info->nb_dev_queues += !enabled;
+ } else {
+ rx_adapter->nb_queues -= enabled;
+ dev_info->nb_dev_queues -= enabled;
+ }
+ queue_info->queue_enabled = !!add;
+ }
+}
+
+static int
+event_eth_rx_adapter_queue_del(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_device_info *dev_info,
+ uint16_t rx_queue_id)
+{
+ struct eth_rx_queue_info *queue_info;
+
+ if (rx_adapter->nb_queues == 0)
+ return 0;
+
+ queue_info = &dev_info->rx_queue[rx_queue_id];
+ rx_adapter->num_rx_polled -= queue_info->queue_enabled;
+ update_queue_info(rx_adapter, dev_info, rx_queue_id, 0);
+ return 0;
+}
+
+static void
+event_eth_rx_adapter_queue_add(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_device_info *dev_info,
+ uint16_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *conf)
+
+{
+ struct eth_rx_queue_info *queue_info;
+ const struct rte_event *ev = &conf->ev;
+
+ queue_info = &dev_info->rx_queue[rx_queue_id];
+ queue_info->event_queue_id = ev->queue_id;
+ queue_info->sched_type = ev->sched_type;
+ queue_info->priority = ev->priority;
+ queue_info->wt = conf->servicing_weight;
+
+ if (conf->rx_queue_flags &
+ RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID) {
+ queue_info->flow_id = ev->flow_id;
+ queue_info->flow_id_mask = ~0;
+ }
+
+ /* The same queue can be added more than once */
+ rx_adapter->num_rx_polled += !queue_info->queue_enabled;
+ update_queue_info(rx_adapter, dev_info, rx_queue_id, 1);
+}
+
+static int add_rx_queue(struct rte_event_eth_rx_adapter *rx_adapter,
+ uint8_t eth_dev_id,
+ int rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ struct eth_device_info *dev_info = &rx_adapter->eth_devices[eth_dev_id];
+ struct rte_event_eth_rx_adapter_queue_conf temp_conf;
+ uint32_t i;
+ int ret;
+
+ if (queue_conf->servicing_weight == 0) {
+
+ struct rte_eth_dev_data *data = dev_info->dev->data;
+ if (data->dev_conf.intr_conf.rxq) {
+ RTE_EDEV_LOG_ERR("Interrupt driven queues"
+ " not supported");
+ return -ENOTSUP;
+ }
+ temp_conf = *queue_conf;
+
+ /* If Rx interrupts are disabled set wt = 1 */
+ temp_conf.servicing_weight = 1;
+ queue_conf = &temp_conf;
+ }
+
+ if (dev_info->rx_queue == NULL) {
+ dev_info->rx_queue =
+ rte_zmalloc_socket(rx_adapter->mem_name,
+ dev_info->dev->data->nb_rx_queues *
+ sizeof(struct eth_rx_queue_info), 0,
+ rx_adapter->socket_id);
+ if (dev_info->rx_queue == NULL)
+ return -ENOMEM;
+ }
+
+ if (rx_queue_id == -1) {
+ for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++)
+ event_eth_rx_adapter_queue_add(rx_adapter,
+ dev_info, i,
+ queue_conf);
+ } else {
+ event_eth_rx_adapter_queue_add(rx_adapter, dev_info,
+ (uint16_t)rx_queue_id,
+ queue_conf);
+ }
+
+ ret = eth_poll_wrr_calc(rx_adapter);
+ if (ret) {
+ event_eth_rx_adapter_queue_del(rx_adapter,
+ dev_info, rx_queue_id);
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+rx_adapter_ctrl(uint8_t id, int start)
+{
+ struct rte_event_eth_rx_adapter *rx_adapter;
+ struct rte_eventdev *dev;
+ struct eth_device_info *dev_info;
+ uint32_t i;
+ int use_service = 0;
+ int stop = !start;
+
+ RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+ rx_adapter = id_to_rx_adapter(id);
+ if (rx_adapter == NULL)
+ return -EINVAL;
+
+ dev = &rte_eventdevs[rx_adapter->eventdev_id];
+
+ for (i = 0; i < rte_eth_dev_count(); i++) {
+ dev_info = &rx_adapter->eth_devices[i];
+ /* if start check for num dev queues */
+ if (start && !dev_info->nb_dev_queues)
+ continue;
+ /* if stop check if dev has been started */
+ if (stop && !dev_info->dev_rx_started)
+ continue;
+ use_service |= !dev_info->internal_event_port;
+ dev_info->dev_rx_started = start;
+ if (dev_info->internal_event_port == 0)
+ continue;
+ start ? (*dev->dev_ops->eth_rx_adapter_start)(dev,
+ &rte_eth_devices[i]) :
+ (*dev->dev_ops->eth_rx_adapter_stop)(dev,
+ &rte_eth_devices[i]);
+ }
+
+ if (use_service)
+ rte_service_runstate_set(rx_adapter->service_id, start);
+
+ return 0;
+}
+
+int
+rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id,
+ rte_event_eth_rx_adapter_conf_cb conf_cb,
+ void *conf_arg)
+{
+ struct rte_event_eth_rx_adapter *rx_adapter;
+ int ret;
+ int socket_id;
+ uint8_t i;
+ char mem_name[ETH_RX_ADAPTER_SERVICE_NAME_LEN];
+ const uint8_t default_rss_key[] = {
+ 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
+ 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
+ 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
+ 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
+ 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
+ };
+
+ RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+ RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+ if (conf_cb == NULL)
+ return -EINVAL;
+
+ if (event_eth_rx_adapter == NULL) {
+ ret = rte_event_eth_rx_adapter_init();
+ if (ret)
+ return ret;
+ }
+
+ rx_adapter = id_to_rx_adapter(id);
+ if (rx_adapter != NULL) {
+ RTE_EDEV_LOG_ERR("Eth Rx adapter exists id = %" PRIu8, id);
+ return -EEXIST;
+ }
+
+ socket_id = rte_event_dev_socket_id(dev_id);
+ snprintf(mem_name, ETH_RX_ADAPTER_MEM_NAME_LEN,
+ "rte_event_eth_rx_adapter_%d",
+ id);
+
+ rx_adapter = rte_zmalloc_socket(mem_name, sizeof(*rx_adapter),
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (rx_adapter == NULL) {
+ RTE_EDEV_LOG_ERR("failed to get mem for rx adapter");
+ return -ENOMEM;
+ }
+
+ rx_adapter->eventdev_id = dev_id;
+ rx_adapter->socket_id = socket_id;
+ rx_adapter->conf_cb = conf_cb;
+ rx_adapter->conf_arg = conf_arg;
+ strcpy(rx_adapter->mem_name, mem_name);
+ rx_adapter->eth_devices = rte_zmalloc_socket(rx_adapter->mem_name,
+ rte_eth_dev_count() *
+ sizeof(struct eth_device_info), 0,
+ socket_id);
+ rte_convert_rss_key((const uint32_t *)default_rss_key,
+ (uint32_t *)rx_adapter->rss_key_be,
+ RTE_DIM(default_rss_key));
+
+ if (rx_adapter->eth_devices == NULL) {
+ RTE_EDEV_LOG_ERR("failed to get mem for eth devices\n");
+ rte_free(rx_adapter);
+ return -ENOMEM;
+ }
+ rte_spinlock_init(&rx_adapter->rx_lock);
+ for (i = 0; i < rte_eth_dev_count(); i++)
+ rx_adapter->eth_devices[i].dev = &rte_eth_devices[i];
+
+ event_eth_rx_adapter[id] = rx_adapter;
+ if (conf_cb == default_conf_cb)
+ rx_adapter->default_cb_arg = 1;
+ return 0;
+}
+
+int
+rte_event_eth_rx_adapter_create(uint8_t id, uint8_t dev_id,
+ struct rte_event_port_conf *port_config)
+{
+ struct rte_event_port_conf *pc;
+ int ret;
+
+ if (port_config == NULL)
+ return -EINVAL;
+ RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+
+ pc = rte_malloc(NULL, sizeof(*pc), 0);
+ if (pc == NULL)
+ return -ENOMEM;
+ *pc = *port_config;
+ ret = rte_event_eth_rx_adapter_create_ext(id, dev_id,
+ default_conf_cb,
+ pc);
+ if (ret)
+ rte_free(pc);
+ return ret;
+}
+
+int
+rte_event_eth_rx_adapter_free(uint8_t id)
+{
+ struct rte_event_eth_rx_adapter *rx_adapter;
+
+ RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+
+ rx_adapter = id_to_rx_adapter(id);
+ if (rx_adapter == NULL)
+ return -EINVAL;
+
+ if (rx_adapter->nb_queues) {
+ RTE_EDEV_LOG_ERR("%" PRIu16 " Rx queues not deleted",
+ rx_adapter->nb_queues);
+ return -EBUSY;
+ }
+
+ if (rx_adapter->default_cb_arg)
+ rte_free(rx_adapter->conf_arg);
+ rte_free(rx_adapter->eth_devices);
+ rte_free(rx_adapter);
+ event_eth_rx_adapter[id] = NULL;
+
+ return 0;
+}
+
+int
+rte_event_eth_rx_adapter_queue_add(uint8_t id,
+ uint8_t eth_dev_id,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+ int ret;
+ uint32_t cap;
+ struct rte_event_eth_rx_adapter *rx_adapter;
+ struct rte_eventdev *dev;
+ struct eth_device_info *dev_info;
+ int start_service;
+
+ RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL);
+
+ rx_adapter = id_to_rx_adapter(id);
+ if ((rx_adapter == NULL) || (queue_conf == NULL))
+ return -EINVAL;
+
+ dev = &rte_eventdevs[rx_adapter->eventdev_id];
+ ret = rte_event_eth_rx_adapter_caps_get(rx_adapter->eventdev_id,
+ eth_dev_id,
+ &cap);
+ if (ret) {
+ RTE_EDEV_LOG_ERR("Failed to get adapter caps edev %" PRIu8
+ "eth port %" PRIu8, id, eth_dev_id);
+ return ret;
+ }
+
+ if ((cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID) == 0
+ && (queue_conf->rx_queue_flags &
+ RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID)) {
+ RTE_EDEV_LOG_ERR("Flow ID override is not supported,"
+ " eth port: %" PRIu8 " adapter id: %" PRIu8,
+ eth_dev_id, id);
+ return -EINVAL;
+ }
+
+ if ((cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ) == 0 &&
+ (rx_queue_id != -1)) {
+ RTE_EDEV_LOG_ERR("Rx queues can only be connected to single "
+ "event queue id %u eth port %u", id, eth_dev_id);
+ return -EINVAL;
+ }
+
+ if (rx_queue_id != -1 && (uint16_t)rx_queue_id >=
+ rte_eth_devices[eth_dev_id].data->nb_rx_queues) {
+ RTE_EDEV_LOG_ERR("Invalid rx queue_id %" PRIu16,
+ (uint16_t)rx_queue_id);
+ return -EINVAL;
+ }
+
+ start_service = 0;
+ dev_info = &rx_adapter->eth_devices[eth_dev_id];
+
+ if (cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT) {
+ RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->eth_rx_adapter_queue_add,
+ -ENOTSUP);
+ if (dev_info->rx_queue == NULL) {
+ dev_info->rx_queue =
+ rte_zmalloc_socket(rx_adapter->mem_name,
+ dev_info->dev->data->nb_rx_queues *
+ sizeof(struct eth_rx_queue_info), 0,
+ rx_adapter->socket_id);
+ if (dev_info->rx_queue == NULL)
+ return -ENOMEM;
+ }
+
+ ret = (*dev->dev_ops->eth_rx_adapter_queue_add)(dev,
+ &rte_eth_devices[eth_dev_id],
+ rx_queue_id, queue_conf);
+ if (ret == 0) {
+ update_queue_info(rx_adapter,
+ &rx_adapter->eth_devices[eth_dev_id],
+ rx_queue_id,
+ 1);
+ }
+ } else {
+ rte_spinlock_lock(&rx_adapter->rx_lock);
+ ret = init_service(rx_adapter, id);
+ if (ret == 0)
+ ret = add_rx_queue(rx_adapter, eth_dev_id, rx_queue_id,
+ queue_conf);
+ rte_spinlock_unlock(&rx_adapter->rx_lock);
+ if (ret == 0)
+ start_service = !!sw_rx_adapter_queue_count(rx_adapter);
+ }
+
+ if (ret)
+ return ret;
+
+ if (start_service)
+ rte_service_component_runstate_set(rx_adapter->service_id, 1);
+
+ return 0;
+}
+
+int
+rte_event_eth_rx_adapter_queue_del(uint8_t id, uint8_t eth_dev_id,
+ int32_t rx_queue_id)
+{
+ int ret = 0;
+ struct rte_eventdev *dev;
+ struct rte_event_eth_rx_adapter *rx_adapter;
+ struct eth_device_info *dev_info;
+ uint32_t cap;
+ uint16_t i;
+
+ RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL);
+
+ rx_adapter = id_to_rx_adapter(id);
+ if (rx_adapter == NULL)
+ return -EINVAL;
+
+ dev = &rte_eventdevs[rx_adapter->eventdev_id];
+ ret = rte_event_eth_rx_adapter_caps_get(rx_adapter->eventdev_id,
+ eth_dev_id,
+ &cap);
+ if (ret)
+ return ret;
+
+ if (rx_queue_id != -1 && (uint16_t)rx_queue_id >=
+ rte_eth_devices[eth_dev_id].data->nb_rx_queues) {
+ RTE_EDEV_LOG_ERR("Invalid rx queue_id %" PRIu16,
+ (uint16_t)rx_queue_id);
+ return -EINVAL;
+ }
+
+ dev_info = &rx_adapter->eth_devices[eth_dev_id];
+
+ if (cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT) {
+ RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->eth_rx_adapter_queue_del,
+ -ENOTSUP);
+ ret = (*dev->dev_ops->eth_rx_adapter_queue_del)(dev,
+ &rte_eth_devices[eth_dev_id],
+ rx_queue_id);
+ if (ret == 0) {
+ update_queue_info(rx_adapter,
+ &rx_adapter->eth_devices[eth_dev_id],
+ rx_queue_id,
+ 0);
+ if (dev_info->nb_dev_queues == 0) {
+ rte_free(dev_info->rx_queue);
+ dev_info->rx_queue = NULL;
+ }
+ }
+ } else {
+ int rc;
+ rte_spinlock_lock(&rx_adapter->rx_lock);
+ if (rx_queue_id == -1) {
+ for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++)
+ event_eth_rx_adapter_queue_del(rx_adapter,
+ dev_info,
+ i);
+ } else {
+ event_eth_rx_adapter_queue_del(rx_adapter,
+ dev_info,
+ (uint16_t)rx_queue_id);
+ }
+
+ rc = eth_poll_wrr_calc(rx_adapter);
+ if (rc)
+ RTE_EDEV_LOG_ERR("WRR recalculation failed %" PRId32,
+ rc);
+
+ if (dev_info->nb_dev_queues == 0) {
+ rte_free(dev_info->rx_queue);
+ dev_info->rx_queue = NULL;
+ }
+
+ rte_spinlock_unlock(&rx_adapter->rx_lock);
+ rte_service_component_runstate_set(rx_adapter->service_id,
+ sw_rx_adapter_queue_count(rx_adapter));
+ }
+
+ return ret;
+}
+
+
+int
+rte_event_eth_rx_adapter_start(uint8_t id)
+{
+ return rx_adapter_ctrl(id, 1);
+}
+
+int
+rte_event_eth_rx_adapter_stop(uint8_t id)
+{
+ return rx_adapter_ctrl(id, 0);
+}
+
+int
+rte_event_eth_rx_adapter_stats_get(uint8_t id,
+ struct rte_event_eth_rx_adapter_stats *stats)
+{
+ struct rte_event_eth_rx_adapter *rx_adapter;
+ struct rte_event_eth_rx_adapter_stats dev_stats_sum = { 0 };
+ struct rte_event_eth_rx_adapter_stats dev_stats;
+ struct rte_eventdev *dev;
+ struct eth_device_info *dev_info;
+ uint32_t i;
+ int ret;
+
+ RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+
+ rx_adapter = id_to_rx_adapter(id);
+ if (rx_adapter == NULL || stats == NULL)
+ return -EINVAL;
+
+ dev = &rte_eventdevs[rx_adapter->eventdev_id];
+ memset(stats, 0, sizeof(*stats));
+ for (i = 0; i < rte_eth_dev_count(); i++) {
+ dev_info = &rx_adapter->eth_devices[i];
+ if (dev_info->internal_event_port == 0 ||
+ dev->dev_ops->eth_rx_adapter_stats_get == NULL)
+ continue;
+ ret = (*dev->dev_ops->eth_rx_adapter_stats_get)(dev,
+ &rte_eth_devices[i],
+ &dev_stats);
+ if (ret)
+ continue;
+ dev_stats_sum.rx_packets += dev_stats.rx_packets;
+ dev_stats_sum.rx_enq_count += dev_stats.rx_enq_count;
+ }
+
+ if (rx_adapter->service_inited)
+ *stats = rx_adapter->stats;
+
+ stats->rx_packets += dev_stats_sum.rx_packets;
+ stats->rx_enq_count += dev_stats_sum.rx_enq_count;
+ return 0;
+}
+
+int
+rte_event_eth_rx_adapter_stats_reset(uint8_t id)
+{
+ struct rte_event_eth_rx_adapter *rx_adapter;
+ struct rte_eventdev *dev;
+ struct eth_device_info *dev_info;
+ uint32_t i;
+
+ RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+
+ rx_adapter = id_to_rx_adapter(id);
+ if (rx_adapter == NULL)
+ return -EINVAL;
+
+ dev = &rte_eventdevs[rx_adapter->eventdev_id];
+ for (i = 0; i < rte_eth_dev_count(); i++) {
+ dev_info = &rx_adapter->eth_devices[i];
+ if (dev_info->internal_event_port == 0 ||
+ dev->dev_ops->eth_rx_adapter_stats_reset == NULL)
+ continue;
+ (*dev->dev_ops->eth_rx_adapter_stats_reset)(dev,
+ &rte_eth_devices[i]);
+ }
+
+ memset(&rx_adapter->stats, 0, sizeof(rx_adapter->stats));
+ return 0;
+}
+
+int
+rte_event_eth_rx_adapter_service_id_get(uint8_t id, uint32_t *service_id)
+{
+ struct rte_event_eth_rx_adapter *rx_adapter;
+
+ RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+
+ rx_adapter = id_to_rx_adapter(id);
+ if (rx_adapter == NULL || service_id == NULL)
+ return -EINVAL;
+
+ if (rx_adapter->service_inited)
+ *service_id = rx_adapter->service_id;
+
+ return rx_adapter->service_inited ? 0 : -ESRCH;
+}
diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.h b/lib/librte_eventdev/rte_event_eth_rx_adapter.h
new file mode 100644
index 00000000..6a9e7edf
--- /dev/null
+++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.h
@@ -0,0 +1,444 @@
+/*
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_EVENT_ETH_RX_ADAPTER_
+#define _RTE_EVENT_ETH_RX_ADAPTER_
+
+/**
+ * @file
+ *
+ * RTE Event Ethernet Rx Adapter
+ *
+ * An eventdev-based packet processing application enqueues/dequeues mbufs
+ * to/from the event device. Packet flow from the ethernet device to the event
+ * device can be accomplished using either HW or SW mechanisms depending on the
+ * platform and the particular combination of ethernet and event devices. The
+ * event ethernet Rx adapter provides common APIs to configure the packet flow
+ * from the ethernet devices to event devices across both these transfer
+ * mechanisms.
+ *
+ * The adapter uses a EAL service core function for SW based packet transfer
+ * and uses the eventdev PMD functions to configure HW based packet transfer
+ * between the ethernet device and the event device.
+ *
+ * The ethernet Rx event adapter's functions are:
+ * - rte_event_eth_rx_adapter_create_ext()
+ * - rte_event_eth_rx_adapter_create()
+ * - rte_event_eth_rx_adapter_free()
+ * - rte_event_eth_rx_adapter_queue_add()
+ * - rte_event_eth_rx_adapter_queue_del()
+ * - rte_event_eth_rx_adapter_start()
+ * - rte_event_eth_rx_adapter_stop()
+ * - rte_event_eth_rx_adapter_stats_get()
+ * - rte_event_eth_rx_adapter_stats_reset()
+ *
+ * The application creates an ethernet to event adapter using
+ * rte_event_eth_rx_adapter_create_ext() or rte_event_eth_rx_adapter_create()
+ * functions.
+ * The adapter needs to know which ethernet rx queues to poll for mbufs as well
+ * as event device parameters such as the event queue identifier, event
+ * priority and scheduling type that the adapter should use when constructing
+ * events. The rte_event_eth_rx_adapter_queue_add() function is provided for
+ * this purpose.
+ * The servicing weight parameter in the rte_event_eth_rx_adapter_queue_conf
+ * is applicable when the Rx adapter uses a service core function and is
+ * intended to provide application control of the frequency of polling ethernet
+ * device receive queues, for example, the application may want to poll higher
+ * priority queues with a higher frequency but at the same time not starve
+ * lower priority queues completely. If this parameter is zero and the receive
+ * interrupt is enabled when configuring the device, the receive queue is
+ * interrupt driven; else, the queue is assigned a servicing weight of one.
+ *
+ * The application can start/stop the adapter using the
+ * rte_event_eth_rx_adapter_start() and the rte_event_eth_rx_adapter_stop()
+ * functions. If the adapter uses a rte_service function, then the application
+ * is also required to assign a core to the service function and control the
+ * service core using the rte_service APIs. The
+ * rte_event_eth_rx_adapter_service_id_get() function can be used to retrieve
+ * the service function ID of the adapter in this case.
+ *
+ * Note: Interrupt driven receive queues are currently unimplemented.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#include <rte_service.h>
+
+#include "rte_eventdev.h"
+
+#define RTE_EVENT_ETH_RX_ADAPTER_MAX_INSTANCE 32
+
+/* struct rte_event_eth_rx_adapter_queue_conf flags definitions */
+#define RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID 0x1
+/**< This flag indicates the flow identifier is valid
+ * @see rte_event_eth_rx_adapter_queue_conf::rx_queue_flags
+ */
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Adapter configuration structure that the adapter configuration callback
+ * function is expected to fill out
+ * @see rte_event_eth_rx_adapter_conf_cb
+ */
+struct rte_event_eth_rx_adapter_conf {
+ uint8_t event_port_id;
+ /**< Event port identifier, the adapter enqueues mbuf events to this
+ * port.
+ */
+ uint32_t max_nb_rx;
+ /**< The adapter can return early if it has processed at least
+ * max_nb_rx mbufs. This isn't treated as a requirement; batching may
+ * cause the adapter to process more than max_nb_rx mbufs.
+ */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Function type used for adapter configuration callback. The callback is
+ * used to fill in members of the struct rte_event_eth_rx_adapter_conf, this
+ * callback is invoked when creating a SW service for packet transfer from
+ * ethdev queues to the event device. The SW service is created within the
+ * rte_event_eth_rx_adapter_queue_add() function if SW based packet transfers
+ * from ethdev queues to the event device are required.
+ *
+ * @param id
+ * Adapter identifier.
+ *
+ * @param dev_id
+ * Event device identifier.
+ *
+ * @param [out] conf
+ * Structure that needs to be populated by this callback.
+ *
+ * @param arg
+ * Argument to the callback. This is the same as the conf_arg passed to the
+ * rte_event_eth_rx_adapter_create_ext().
+ */
+typedef int (*rte_event_eth_rx_adapter_conf_cb) (uint8_t id, uint8_t dev_id,
+ struct rte_event_eth_rx_adapter_conf *conf,
+ void *arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Rx queue configuration structure
+ */
+struct rte_event_eth_rx_adapter_queue_conf {
+ uint32_t rx_queue_flags;
+ /**< Flags for handling received packets
+ * @see RTE_EVENT_ETH_RX_ADAPTER_QUEUE_FLOW_ID_VALID
+ */
+ uint16_t servicing_weight;
+ /**< Relative polling frequency of ethernet receive queue when the
+ * adapter uses a service core function for ethernet to event device
+ * transfers. If it is set to zero, the Rx queue is interrupt driven
+ * (unless rx queue interrupts are not enabled for the ethernet
+ * device).
+ */
+ struct rte_event ev;
+ /**<
+ * The values from the following event fields will be used when
+ * queuing mbuf events:
+ * - event_queue_id: Targeted event queue ID for received packets.
+ * - event_priority: Event priority of packets from this Rx queue in
+ * the event queue relative to other events.
+ * - sched_type: Scheduling type for packets from this Rx queue.
+ * - flow_id: If the RTE_ETH_RX_EVENT_ADAPTER_QUEUE_FLOW_ID_VALID bit
+ * is set in rx_queue_flags, this flow_id is used for all
+ * packets received from this queue. Otherwise the flow ID
+ * is set to the RSS hash of the src and dst IPv4/6
+ * addresses.
+ *
+ * The event adapter sets ev.event_type to RTE_EVENT_TYPE_ETHDEV in the
+ * enqueued event.
+ */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * A structure used to retrieve statistics for an eth rx adapter instance.
+ */
+struct rte_event_eth_rx_adapter_stats {
+ uint64_t rx_poll_count;
+ /**< Receive queue poll count */
+ uint64_t rx_packets;
+ /**< Received packet count */
+ uint64_t rx_enq_count;
+ /**< Eventdev enqueue count */
+ uint64_t rx_enq_retry;
+ /**< Eventdev enqueue retry count */
+ uint64_t rx_enq_start_ts;
+ /**< Rx enqueue start timestamp */
+ uint64_t rx_enq_block_cycles;
+ /**< Cycles for which the service is blocked by the event device,
+ * i.e, the service fails to enqueue to the event device.
+ */
+ uint64_t rx_enq_end_ts;
+ /**< Latest timestamp at which the service is unblocked
+ * by the event device. The start, end timestamps and
+ * block cycles can be used to compute the percentage of
+ * cycles the service is blocked by the event device.
+ */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Create a new ethernet Rx event adapter with the specified identifier.
+ *
+ * @param id
+ * The identifier of the ethernet Rx event adapter.
+ *
+ * @param dev_id
+ * The identifier of the device to configure.
+ *
+ * @param conf_cb
+ * Callback function that fills in members of a
+ * struct rte_event_eth_rx_adapter_conf struct passed into
+ * it.
+ *
+ * @param conf_arg
+ * Argument that is passed to the conf_cb function.
+ *
+ * @return
+ * - 0: Success
+ * - <0: Error code on failure
+ */
+int rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id,
+ rte_event_eth_rx_adapter_conf_cb conf_cb,
+ void *conf_arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Create a new ethernet Rx event adapter with the specified identifier.
+ * This function uses an internal configuration function that creates an event
+ * port. This default function reconfigures the event device with an
+ * additional event port and setups up the event port using the port_config
+ * parameter passed into this function. In case the application needs more
+ * control in configuration of the service, it should use the
+ * rte_event_eth_rx_adapter_create_ext() version.
+ *
+ * @param id
+ * The identifier of the ethernet Rx event adapter.
+ *
+ * @param dev_id
+ * The identifier of the device to configure.
+ *
+ * @param port_config
+ * Argument of type *rte_event_port_conf* that is passed to the conf_cb
+ * function.
+ *
+ * @return
+ * - 0: Success
+ * - <0: Error code on failure
+ */
+int rte_event_eth_rx_adapter_create(uint8_t id, uint8_t dev_id,
+ struct rte_event_port_conf *port_config);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Free an event adapter
+ *
+ * @param id
+ * Adapter identifier.
+ *
+ * @return
+ * - 0: Success
+ * - <0: Error code on failure, If the adapter still has Rx queues
+ * added to it, the function returns -EBUSY.
+ */
+int rte_event_eth_rx_adapter_free(uint8_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Add receive queue to an event adapter. After a queue has been
+ * added to the event adapter, the result of the application calling
+ * rte_eth_rx_burst(eth_dev_id, rx_queue_id, ..) is undefined.
+ *
+ * @param id
+ * Adapter identifier.
+ *
+ * @param eth_dev_id
+ * Port identifier of Ethernet device.
+ *
+ * @param rx_queue_id
+ * Ethernet device receive queue index.
+ * If rx_queue_id is -1, then all Rx queues configured for
+ * the device are added. If the ethdev Rx queues can only be
+ * connected to a single event queue then rx_queue_id is
+ * required to be -1.
+ * @see RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ
+ *
+ * @param conf
+ * Additional configuration structure of type *rte_event_eth_rx_adapter_conf*
+ *
+ * @return
+ * - 0: Success, Receive queue added correctly.
+ * - <0: Error code on failure.
+ */
+int rte_event_eth_rx_adapter_queue_add(uint8_t id,
+ uint8_t eth_dev_id,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *conf);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Delete receive queue from an event adapter.
+ *
+ * @param id
+ * Adapter identifier.
+ *
+ * @param eth_dev_id
+ * Port identifier of Ethernet device.
+ *
+ * @param rx_queue_id
+ * Ethernet device receive queue index.
+ * If rx_queue_id is -1, then all Rx queues configured for
+ * the device are deleted. If the ethdev Rx queues can only be
+ * connected to a single event queue then rx_queue_id is
+ * required to be -1.
+ * @see RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ
+ *
+ * @return
+ * - 0: Success, Receive queue deleted correctly.
+ * - <0: Error code on failure.
+ */
+int rte_event_eth_rx_adapter_queue_del(uint8_t id, uint8_t eth_dev_id,
+ int32_t rx_queue_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Start ethernet Rx event adapter
+ *
+ * @param id
+ * Adapter identifier.
+ *
+ * @return
+ * - 0: Success, Adapter started correctly.
+ * - <0: Error code on failure.
+ */
+int rte_event_eth_rx_adapter_start(uint8_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Stop ethernet Rx event adapter
+ *
+ * @param id
+ * Adapter identifier.
+ *
+ * @return
+ * - 0: Success, Adapter started correctly.
+ * - <0: Error code on failure.
+ */
+int rte_event_eth_rx_adapter_stop(uint8_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve statistics for an adapter
+ *
+ * @param id
+ * Adapter identifier.
+ *
+ * @param [out] stats
+ * A pointer to structure used to retrieve statistics for an adapter.
+ *
+ * @return
+ * - 0: Success, retrieved successfully.
+ * - <0: Error code on failure.
+ */
+int rte_event_eth_rx_adapter_stats_get(uint8_t id,
+ struct rte_event_eth_rx_adapter_stats *stats);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Reset statistics for an adapter.
+ *
+ * @param id
+ * Adapter identifier.
+ *
+ * @return
+ * - 0: Success, statistics reset successfully.
+ * - <0: Error code on failure.
+ */
+int rte_event_eth_rx_adapter_stats_reset(uint8_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve the service ID of an adapter. If the adapter doesn't use
+ * a rte_service function, this function returns -ESRCH.
+ *
+ * @param id
+ * Adapter identifier.
+ *
+ * @param [out] service_id
+ * A pointer to a uint32_t, to be filled in with the service id.
+ *
+ * @return
+ * - 0: Success
+ * - <0: Error code on failure, if the adapter doesn't use a rte_service
+ * function, this function returns -ESRCH.
+ */
+int rte_event_eth_rx_adapter_service_id_get(uint8_t id, uint32_t *service_id);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* _RTE_EVENT_ETH_RX_ADAPTER_ */
diff --git a/lib/librte_eventdev/rte_eventdev.c b/lib/librte_eventdev/rte_eventdev.c
index bbb38050..ce6a5dc1 100644
--- a/lib/librte_eventdev/rte_eventdev.c
+++ b/lib/librte_eventdev/rte_eventdev.c
@@ -56,6 +56,7 @@
#include <rte_common.h>
#include <rte_malloc.h>
#include <rte_errno.h>
+#include <rte_ethdev.h>
#include "rte_eventdev.h"
#include "rte_eventdev_pmd.h"
@@ -128,55 +129,77 @@ rte_event_dev_info_get(uint8_t dev_id, struct rte_event_dev_info *dev_info)
return 0;
}
+int
+rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint8_t eth_port_id,
+ uint32_t *caps)
+{
+ struct rte_eventdev *dev;
+
+ RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_port_id, -EINVAL);
+
+ dev = &rte_eventdevs[dev_id];
+
+ if (caps == NULL)
+ return -EINVAL;
+ *caps = 0;
+
+ return dev->dev_ops->eth_rx_adapter_caps_get ?
+ (*dev->dev_ops->eth_rx_adapter_caps_get)(dev,
+ &rte_eth_devices[eth_port_id],
+ caps)
+ : 0;
+}
+
static inline int
rte_event_dev_queue_config(struct rte_eventdev *dev, uint8_t nb_queues)
{
uint8_t old_nb_queues = dev->data->nb_queues;
- uint8_t *queues_prio;
+ struct rte_event_queue_conf *queues_cfg;
unsigned int i;
RTE_EDEV_LOG_DEBUG("Setup %d queues on device %u", nb_queues,
dev->data->dev_id);
/* First time configuration */
- if (dev->data->queues_prio == NULL && nb_queues != 0) {
- /* Allocate memory to store queue priority */
- dev->data->queues_prio = rte_zmalloc_socket(
- "eventdev->data->queues_prio",
- sizeof(dev->data->queues_prio[0]) * nb_queues,
+ if (dev->data->queues_cfg == NULL && nb_queues != 0) {
+ /* Allocate memory to store queue configuration */
+ dev->data->queues_cfg = rte_zmalloc_socket(
+ "eventdev->data->queues_cfg",
+ sizeof(dev->data->queues_cfg[0]) * nb_queues,
RTE_CACHE_LINE_SIZE, dev->data->socket_id);
- if (dev->data->queues_prio == NULL) {
+ if (dev->data->queues_cfg == NULL) {
dev->data->nb_queues = 0;
- RTE_EDEV_LOG_ERR("failed to get mem for queue priority,"
+ RTE_EDEV_LOG_ERR("failed to get mem for queue cfg,"
"nb_queues %u", nb_queues);
return -(ENOMEM);
}
/* Re-configure */
- } else if (dev->data->queues_prio != NULL && nb_queues != 0) {
+ } else if (dev->data->queues_cfg != NULL && nb_queues != 0) {
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_release, -ENOTSUP);
for (i = nb_queues; i < old_nb_queues; i++)
(*dev->dev_ops->queue_release)(dev, i);
- /* Re allocate memory to store queue priority */
- queues_prio = dev->data->queues_prio;
- queues_prio = rte_realloc(queues_prio,
- sizeof(queues_prio[0]) * nb_queues,
+ /* Re allocate memory to store queue configuration */
+ queues_cfg = dev->data->queues_cfg;
+ queues_cfg = rte_realloc(queues_cfg,
+ sizeof(queues_cfg[0]) * nb_queues,
RTE_CACHE_LINE_SIZE);
- if (queues_prio == NULL) {
- RTE_EDEV_LOG_ERR("failed to realloc queue priority,"
+ if (queues_cfg == NULL) {
+ RTE_EDEV_LOG_ERR("failed to realloc queue cfg memory,"
" nb_queues %u", nb_queues);
return -(ENOMEM);
}
- dev->data->queues_prio = queues_prio;
+ dev->data->queues_cfg = queues_cfg;
if (nb_queues > old_nb_queues) {
uint8_t new_qs = nb_queues - old_nb_queues;
- memset(queues_prio + old_nb_queues, 0,
- sizeof(queues_prio[0]) * new_qs);
+ memset(queues_cfg + old_nb_queues, 0,
+ sizeof(queues_cfg[0]) * new_qs);
}
- } else if (dev->data->queues_prio != NULL && nb_queues == 0) {
+ } else if (dev->data->queues_cfg != NULL && nb_queues == 0) {
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_release, -ENOTSUP);
for (i = nb_queues; i < old_nb_queues; i++)
@@ -195,8 +218,7 @@ rte_event_dev_port_config(struct rte_eventdev *dev, uint8_t nb_ports)
uint8_t old_nb_ports = dev->data->nb_ports;
void **ports;
uint16_t *links_map;
- uint8_t *ports_dequeue_depth;
- uint8_t *ports_enqueue_depth;
+ struct rte_event_port_conf *ports_cfg;
unsigned int i;
RTE_EDEV_LOG_DEBUG("Setup %d ports on device %u", nb_ports,
@@ -214,26 +236,14 @@ rte_event_dev_port_config(struct rte_eventdev *dev, uint8_t nb_ports)
return -(ENOMEM);
}
- /* Allocate memory to store ports dequeue depth */
- dev->data->ports_dequeue_depth =
- rte_zmalloc_socket("eventdev->ports_dequeue_depth",
- sizeof(dev->data->ports_dequeue_depth[0]) * nb_ports,
+ /* Allocate memory to store port configurations */
+ dev->data->ports_cfg =
+ rte_zmalloc_socket("eventdev->ports_cfg",
+ sizeof(dev->data->ports_cfg[0]) * nb_ports,
RTE_CACHE_LINE_SIZE, dev->data->socket_id);
- if (dev->data->ports_dequeue_depth == NULL) {
+ if (dev->data->ports_cfg == NULL) {
dev->data->nb_ports = 0;
- RTE_EDEV_LOG_ERR("failed to get mem for port deq meta,"
- "nb_ports %u", nb_ports);
- return -(ENOMEM);
- }
-
- /* Allocate memory to store ports enqueue depth */
- dev->data->ports_enqueue_depth =
- rte_zmalloc_socket("eventdev->ports_enqueue_depth",
- sizeof(dev->data->ports_enqueue_depth[0]) * nb_ports,
- RTE_CACHE_LINE_SIZE, dev->data->socket_id);
- if (dev->data->ports_enqueue_depth == NULL) {
- dev->data->nb_ports = 0;
- RTE_EDEV_LOG_ERR("failed to get mem for port enq meta,"
+ RTE_EDEV_LOG_ERR("failed to get mem for port cfg,"
"nb_ports %u", nb_ports);
return -(ENOMEM);
}
@@ -257,8 +267,7 @@ rte_event_dev_port_config(struct rte_eventdev *dev, uint8_t nb_ports)
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->port_release, -ENOTSUP);
ports = dev->data->ports;
- ports_dequeue_depth = dev->data->ports_dequeue_depth;
- ports_enqueue_depth = dev->data->ports_enqueue_depth;
+ ports_cfg = dev->data->ports_cfg;
links_map = dev->data->links_map;
for (i = nb_ports; i < old_nb_ports; i++)
@@ -273,22 +282,12 @@ rte_event_dev_port_config(struct rte_eventdev *dev, uint8_t nb_ports)
return -(ENOMEM);
}
- /* Realloc memory for ports_dequeue_depth */
- ports_dequeue_depth = rte_realloc(ports_dequeue_depth,
- sizeof(ports_dequeue_depth[0]) * nb_ports,
+ /* Realloc memory for ports_cfg */
+ ports_cfg = rte_realloc(ports_cfg,
+ sizeof(ports_cfg[0]) * nb_ports,
RTE_CACHE_LINE_SIZE);
- if (ports_dequeue_depth == NULL) {
- RTE_EDEV_LOG_ERR("failed to realloc port dequeue meta,"
- " nb_ports %u", nb_ports);
- return -(ENOMEM);
- }
-
- /* Realloc memory for ports_enqueue_depth */
- ports_enqueue_depth = rte_realloc(ports_enqueue_depth,
- sizeof(ports_enqueue_depth[0]) * nb_ports,
- RTE_CACHE_LINE_SIZE);
- if (ports_enqueue_depth == NULL) {
- RTE_EDEV_LOG_ERR("failed to realloc port enqueue meta,"
+ if (ports_cfg == NULL) {
+ RTE_EDEV_LOG_ERR("failed to realloc port cfg mem,"
" nb_ports %u", nb_ports);
return -(ENOMEM);
}
@@ -314,18 +313,15 @@ rte_event_dev_port_config(struct rte_eventdev *dev, uint8_t nb_ports)
memset(ports + old_nb_ports, 0,
sizeof(ports[0]) * new_ps);
- memset(ports_dequeue_depth + old_nb_ports, 0,
- sizeof(ports_dequeue_depth[0]) * new_ps);
- memset(ports_enqueue_depth + old_nb_ports, 0,
- sizeof(ports_enqueue_depth[0]) * new_ps);
+ memset(ports_cfg + old_nb_ports, 0,
+ sizeof(ports_cfg[0]) * new_ps);
for (i = old_links_map_end; i < links_map_end; i++)
links_map[i] =
EVENT_QUEUE_SERVICE_PRIORITY_INVALID;
}
dev->data->ports = ports;
- dev->data->ports_dequeue_depth = ports_dequeue_depth;
- dev->data->ports_enqueue_depth = ports_enqueue_depth;
+ dev->data->ports_cfg = ports_cfg;
dev->data->links_map = links_map;
} else if (dev->data->ports != NULL && nb_ports == 0) {
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->port_release, -ENOTSUP);
@@ -519,13 +515,13 @@ rte_event_queue_default_conf_get(uint8_t dev_id, uint8_t queue_id,
static inline int
is_valid_atomic_queue_conf(const struct rte_event_queue_conf *queue_conf)
{
- if (queue_conf && (
- ((queue_conf->event_queue_cfg &
- RTE_EVENT_QUEUE_CFG_TYPE_MASK)
- == RTE_EVENT_QUEUE_CFG_ALL_TYPES) ||
+ if (queue_conf &&
+ !(queue_conf->event_queue_cfg &
+ RTE_EVENT_QUEUE_CFG_SINGLE_LINK) &&
((queue_conf->event_queue_cfg &
- RTE_EVENT_QUEUE_CFG_TYPE_MASK)
- == RTE_EVENT_QUEUE_CFG_ATOMIC_ONLY)
+ RTE_EVENT_QUEUE_CFG_ALL_TYPES) ||
+ (queue_conf->schedule_type
+ == RTE_SCHED_TYPE_ATOMIC)
))
return 1;
else
@@ -535,13 +531,13 @@ is_valid_atomic_queue_conf(const struct rte_event_queue_conf *queue_conf)
static inline int
is_valid_ordered_queue_conf(const struct rte_event_queue_conf *queue_conf)
{
- if (queue_conf && (
- ((queue_conf->event_queue_cfg &
- RTE_EVENT_QUEUE_CFG_TYPE_MASK)
- == RTE_EVENT_QUEUE_CFG_ALL_TYPES) ||
+ if (queue_conf &&
+ !(queue_conf->event_queue_cfg &
+ RTE_EVENT_QUEUE_CFG_SINGLE_LINK) &&
((queue_conf->event_queue_cfg &
- RTE_EVENT_QUEUE_CFG_TYPE_MASK)
- == RTE_EVENT_QUEUE_CFG_ORDERED_ONLY)
+ RTE_EVENT_QUEUE_CFG_ALL_TYPES) ||
+ (queue_conf->schedule_type
+ == RTE_SCHED_TYPE_ORDERED)
))
return 1;
else
@@ -605,31 +601,10 @@ rte_event_queue_setup(uint8_t dev_id, uint8_t queue_id,
queue_conf = &def_conf;
}
- dev->data->queues_prio[queue_id] = queue_conf->priority;
+ dev->data->queues_cfg[queue_id] = *queue_conf;
return (*dev->dev_ops->queue_setup)(dev, queue_id, queue_conf);
}
-uint8_t
-rte_event_queue_count(uint8_t dev_id)
-{
- struct rte_eventdev *dev;
-
- dev = &rte_eventdevs[dev_id];
- return dev->data->nb_queues;
-}
-
-uint8_t
-rte_event_queue_priority(uint8_t dev_id, uint8_t queue_id)
-{
- struct rte_eventdev *dev;
-
- dev = &rte_eventdevs[dev_id];
- if (dev->data->event_dev_cap & RTE_EVENT_DEV_CAP_QUEUE_QOS)
- return dev->data->queues_prio[queue_id];
- else
- return RTE_EVENT_DEV_PRIORITY_NORMAL;
-}
-
static inline int
is_valid_port(struct rte_eventdev *dev, uint8_t port_id)
{
@@ -726,10 +701,7 @@ rte_event_port_setup(uint8_t dev_id, uint8_t port_id,
port_conf = &def_conf;
}
- dev->data->ports_dequeue_depth[port_id] =
- port_conf->dequeue_depth;
- dev->data->ports_enqueue_depth[port_id] =
- port_conf->enqueue_depth;
+ dev->data->ports_cfg[port_id] = *port_conf;
diag = (*dev->dev_ops->port_setup)(dev, port_id, port_conf);
@@ -743,31 +715,110 @@ rte_event_port_setup(uint8_t dev_id, uint8_t port_id,
return 0;
}
-uint8_t
-rte_event_port_dequeue_depth(uint8_t dev_id, uint8_t port_id)
+int
+rte_event_dev_attr_get(uint8_t dev_id, uint32_t attr_id,
+ uint32_t *attr_value)
{
struct rte_eventdev *dev;
+ if (!attr_value)
+ return -EINVAL;
+ RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
dev = &rte_eventdevs[dev_id];
- return dev->data->ports_dequeue_depth[port_id];
+
+ switch (attr_id) {
+ case RTE_EVENT_DEV_ATTR_PORT_COUNT:
+ *attr_value = dev->data->nb_ports;
+ break;
+ case RTE_EVENT_DEV_ATTR_QUEUE_COUNT:
+ *attr_value = dev->data->nb_queues;
+ break;
+ case RTE_EVENT_DEV_ATTR_STARTED:
+ *attr_value = dev->data->dev_started;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
}
-uint8_t
-rte_event_port_enqueue_depth(uint8_t dev_id, uint8_t port_id)
+int
+rte_event_port_attr_get(uint8_t dev_id, uint8_t port_id, uint32_t attr_id,
+ uint32_t *attr_value)
{
struct rte_eventdev *dev;
+ if (!attr_value)
+ return -EINVAL;
+
+ RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
dev = &rte_eventdevs[dev_id];
- return dev->data->ports_enqueue_depth[port_id];
+ if (!is_valid_port(dev, port_id)) {
+ RTE_EDEV_LOG_ERR("Invalid port_id=%" PRIu8, port_id);
+ return -EINVAL;
+ }
+
+ switch (attr_id) {
+ case RTE_EVENT_PORT_ATTR_ENQ_DEPTH:
+ *attr_value = dev->data->ports_cfg[port_id].enqueue_depth;
+ break;
+ case RTE_EVENT_PORT_ATTR_DEQ_DEPTH:
+ *attr_value = dev->data->ports_cfg[port_id].dequeue_depth;
+ break;
+ case RTE_EVENT_PORT_ATTR_NEW_EVENT_THRESHOLD:
+ *attr_value = dev->data->ports_cfg[port_id].new_event_threshold;
+ break;
+ default:
+ return -EINVAL;
+ };
+ return 0;
}
-uint8_t
-rte_event_port_count(uint8_t dev_id)
+int
+rte_event_queue_attr_get(uint8_t dev_id, uint8_t queue_id, uint32_t attr_id,
+ uint32_t *attr_value)
{
+ struct rte_event_queue_conf *conf;
struct rte_eventdev *dev;
+ if (!attr_value)
+ return -EINVAL;
+
+ RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
dev = &rte_eventdevs[dev_id];
- return dev->data->nb_ports;
+ if (!is_valid_queue(dev, queue_id)) {
+ RTE_EDEV_LOG_ERR("Invalid queue_id=%" PRIu8, queue_id);
+ return -EINVAL;
+ }
+
+ conf = &dev->data->queues_cfg[queue_id];
+
+ switch (attr_id) {
+ case RTE_EVENT_QUEUE_ATTR_PRIORITY:
+ *attr_value = RTE_EVENT_DEV_PRIORITY_NORMAL;
+ if (dev->data->event_dev_cap & RTE_EVENT_DEV_CAP_QUEUE_QOS)
+ *attr_value = conf->priority;
+ break;
+ case RTE_EVENT_QUEUE_ATTR_NB_ATOMIC_FLOWS:
+ *attr_value = conf->nb_atomic_flows;
+ break;
+ case RTE_EVENT_QUEUE_ATTR_NB_ATOMIC_ORDER_SEQUENCES:
+ *attr_value = conf->nb_atomic_order_sequences;
+ break;
+ case RTE_EVENT_QUEUE_ATTR_EVENT_QUEUE_CFG:
+ *attr_value = conf->event_queue_cfg;
+ break;
+ case RTE_EVENT_QUEUE_ATTR_SCHEDULE_TYPE:
+ if (conf->event_queue_cfg & RTE_EVENT_QUEUE_CFG_ALL_TYPES)
+ return -EOVERFLOW;
+
+ *attr_value = conf->schedule_type;
+ break;
+ default:
+ return -EINVAL;
+ };
+ return 0;
}
int
@@ -912,6 +963,23 @@ rte_event_dequeue_timeout_ticks(uint8_t dev_id, uint64_t ns,
}
int
+rte_event_dev_service_id_get(uint8_t dev_id, uint32_t *service_id)
+{
+ struct rte_eventdev *dev;
+
+ RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+ dev = &rte_eventdevs[dev_id];
+
+ if (service_id == NULL)
+ return -EINVAL;
+
+ if (dev->data->service_inited)
+ *service_id = dev->data->service_id;
+
+ return dev->data->service_inited ? 0 : -ESRCH;
+}
+
+int
rte_event_dev_dump(uint8_t dev_id, FILE *f)
{
struct rte_eventdev *dev;
diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h
index 128bc522..f1949ff7 100644
--- a/lib/librte_eventdev/rte_eventdev.h
+++ b/lib/librte_eventdev/rte_eventdev.h
@@ -218,10 +218,10 @@
* (each worker thread schedules events to its own port) or centralized
* (a dedicated thread schedules to all ports). Distributed software schedulers
* perform the scheduling in rte_event_dequeue_burst(), whereas centralized
- * scheduler logic is located in rte_event_schedule().
+ * scheduler logic need a dedicated service core for scheduling.
* The RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED capability flag is not set
* indicates the device is centralized and thus needs a dedicated scheduling
- * thread that repeatedly calls rte_event_schedule().
+ * thread that repeatedly calls software specific scheduling function.
*
* An event driven worker thread has following typical workflow on fastpath:
* \code{.c}
@@ -263,16 +263,16 @@ struct rte_mbuf; /* we just use mbuf pointers; no need to include rte_mbuf.h */
* In distributed scheduling mode, event scheduling happens in HW or
* rte_event_dequeue_burst() or the combination of these two.
* If the flag is not set then eventdev is centralized and thus needs a
- * dedicated scheduling thread that repeatedly calls rte_event_schedule().
+ * dedicated service core that acts as a scheduling thread .
*
- * @see rte_event_schedule(), rte_event_dequeue_burst()
+ * @see rte_event_dequeue_burst()
*/
#define RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES (1ULL << 3)
/**< Event device is capable of enqueuing events of any type to any queue.
* If this capability is not set, the queue only supports events of the
- * *RTE_EVENT_QUEUE_CFG_* type that it was created with.
+ * *RTE_SCHED_TYPE_* type that it was created with.
*
- * @see RTE_EVENT_QUEUE_CFG_* values
+ * @see RTE_SCHED_TYPE_* values
*/
#define RTE_EVENT_DEV_CAP_BURST_MODE (1ULL << 4)
/**< Event device is capable of operating in burst mode for enqueue(forward,
@@ -399,6 +399,36 @@ struct rte_event_dev_info {
int
rte_event_dev_info_get(uint8_t dev_id, struct rte_event_dev_info *dev_info);
+/**
+ * The count of ports.
+ */
+#define RTE_EVENT_DEV_ATTR_PORT_COUNT 0
+/**
+ * The count of queues.
+ */
+#define RTE_EVENT_DEV_ATTR_QUEUE_COUNT 1
+/**
+ * The status of the device, zero for stopped, non-zero for started.
+ */
+#define RTE_EVENT_DEV_ATTR_STARTED 2
+
+/**
+ * Get an attribute from a device.
+ *
+ * @param dev_id Eventdev id
+ * @param attr_id The attribute ID to retrieve
+ * @param[out] attr_value A pointer that will be filled in with the attribute
+ * value if successful.
+ *
+ * @retval 0 Successfully retrieved attribute value
+ * -EINVAL Invalid device or *attr_id* provided, or *attr_value*
+ * is NULL
+ */
+int
+rte_event_dev_attr_get(uint8_t dev_id, uint32_t attr_id,
+ uint32_t *attr_value);
+
+
/* Event device configuration bitmap flags */
#define RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT (1ULL << 0)
/**< Override the global *dequeue_timeout_ns* and use per dequeue timeout in ns.
@@ -485,39 +515,13 @@ rte_event_dev_configure(uint8_t dev_id,
/* Event queue specific APIs */
/* Event queue configuration bitmap flags */
-#define RTE_EVENT_QUEUE_CFG_TYPE_MASK (3ULL << 0)
-/**< Mask for event queue schedule type configuration request */
-#define RTE_EVENT_QUEUE_CFG_ALL_TYPES (0ULL << 0)
+#define RTE_EVENT_QUEUE_CFG_ALL_TYPES (1ULL << 0)
/**< Allow ATOMIC,ORDERED,PARALLEL schedule type enqueue
*
* @see RTE_SCHED_TYPE_ORDERED, RTE_SCHED_TYPE_ATOMIC, RTE_SCHED_TYPE_PARALLEL
* @see rte_event_enqueue_burst()
*/
-#define RTE_EVENT_QUEUE_CFG_ATOMIC_ONLY (1ULL << 0)
-/**< Allow only ATOMIC schedule type enqueue
- *
- * The rte_event_enqueue_burst() result is undefined if the queue configured
- * with ATOMIC only and sched_type != RTE_SCHED_TYPE_ATOMIC
- *
- * @see RTE_SCHED_TYPE_ATOMIC, rte_event_enqueue_burst()
- */
-#define RTE_EVENT_QUEUE_CFG_ORDERED_ONLY (2ULL << 0)
-/**< Allow only ORDERED schedule type enqueue
- *
- * The rte_event_enqueue_burst() result is undefined if the queue configured
- * with ORDERED only and sched_type != RTE_SCHED_TYPE_ORDERED
- *
- * @see RTE_SCHED_TYPE_ORDERED, rte_event_enqueue_burst()
- */
-#define RTE_EVENT_QUEUE_CFG_PARALLEL_ONLY (3ULL << 0)
-/**< Allow only PARALLEL schedule type enqueue
- *
- * The rte_event_enqueue_burst() result is undefined if the queue configured
- * with PARALLEL only and sched_type != RTE_SCHED_TYPE_PARALLEL
- *
- * @see RTE_SCHED_TYPE_PARALLEL, rte_event_enqueue_burst()
- */
-#define RTE_EVENT_QUEUE_CFG_SINGLE_LINK (1ULL << 2)
+#define RTE_EVENT_QUEUE_CFG_SINGLE_LINK (1ULL << 1)
/**< This event queue links only to a single event port.
*
* @see rte_event_port_setup(), rte_event_port_link()
@@ -528,8 +532,8 @@ struct rte_event_queue_conf {
uint32_t nb_atomic_flows;
/**< The maximum number of active flows this queue can track at any
* given time. If the queue is configured for atomic scheduling (by
- * applying the RTE_EVENT_QUEUE_CFG_ALL_TYPES or
- * RTE_EVENT_QUEUE_CFG_ATOMIC_ONLY flags to event_queue_cfg), then the
+ * applying the RTE_EVENT_QUEUE_CFG_ALL_TYPES flag to event_queue_cfg
+ * or RTE_SCHED_TYPE_ATOMIC flag to schedule_type), then the
* value must be in the range of [1, nb_event_queue_flows], which was
* previously provided in rte_event_dev_configure().
*/
@@ -542,12 +546,18 @@ struct rte_event_queue_conf {
* event will be returned from dequeue until one or more entries are
* freed up/released.
* If the queue is configured for ordered scheduling (by applying the
- * RTE_EVENT_QUEUE_CFG_ALL_TYPES or RTE_EVENT_QUEUE_CFG_ORDERED_ONLY
- * flags to event_queue_cfg), then the value must be in the range of
- * [1, nb_event_queue_flows], which was previously supplied to
- * rte_event_dev_configure().
+ * RTE_EVENT_QUEUE_CFG_ALL_TYPES flag to event_queue_cfg or
+ * RTE_SCHED_TYPE_ORDERED flag to schedule_type), then the value must
+ * be in the range of [1, nb_event_queue_flows], which was
+ * previously supplied to rte_event_dev_configure().
+ */
+ uint32_t event_queue_cfg;
+ /**< Queue cfg flags(EVENT_QUEUE_CFG_) */
+ uint8_t schedule_type;
+ /**< Queue schedule type(RTE_SCHED_TYPE_*).
+ * Valid when RTE_EVENT_QUEUE_CFG_ALL_TYPES bit is not set in
+ * event_queue_cfg.
*/
- uint32_t event_queue_cfg; /**< Queue cfg flags(EVENT_QUEUE_CFG_) */
uint8_t priority;
/**< Priority for this event queue relative to other event queues.
* The requested priority should in the range of
@@ -607,31 +617,45 @@ rte_event_queue_setup(uint8_t dev_id, uint8_t queue_id,
const struct rte_event_queue_conf *queue_conf);
/**
- * Get the number of event queues on a specific event device
- *
- * @param dev_id
- * Event device identifier.
- * @return
- * - The number of configured event queues
+ * The priority of the queue.
*/
-uint8_t
-rte_event_queue_count(uint8_t dev_id);
+#define RTE_EVENT_QUEUE_ATTR_PRIORITY 0
+/**
+ * The number of atomic flows configured for the queue.
+ */
+#define RTE_EVENT_QUEUE_ATTR_NB_ATOMIC_FLOWS 1
+/**
+ * The number of atomic order sequences configured for the queue.
+ */
+#define RTE_EVENT_QUEUE_ATTR_NB_ATOMIC_ORDER_SEQUENCES 2
+/**
+ * The cfg flags for the queue.
+ */
+#define RTE_EVENT_QUEUE_ATTR_EVENT_QUEUE_CFG 3
+/**
+ * The schedule type of the queue.
+ */
+#define RTE_EVENT_QUEUE_ATTR_SCHEDULE_TYPE 4
/**
- * Get the priority of the event queue on a specific event device
- *
- * @param dev_id
- * Event device identifier.
- * @param queue_id
- * Event queue identifier.
- * @return
- * - If the device has RTE_EVENT_DEV_CAP_QUEUE_QOS capability then the
- * configured priority of the event queue in
- * [RTE_EVENT_DEV_PRIORITY_HIGHEST, RTE_EVENT_DEV_PRIORITY_LOWEST] range
- * else the value RTE_EVENT_DEV_PRIORITY_NORMAL
+ * Get an attribute from a queue.
+ *
+ * @param dev_id Eventdev id
+ * @param queue_id Eventdev queue id
+ * @param attr_id The attribute ID to retrieve
+ * @param[out] attr_value A pointer that will be filled in with the attribute
+ * value if successful
+ *
+ * @retval 0 Successfully returned value
+ * -EINVAL invalid device, queue or attr_id provided, or attr_value
+ * was NULL
+ * -EOVERFLOW returned when attr_id is set to
+ * RTE_EVENT_QUEUE_ATTR_SCHEDULE_TYPE and event_queue_cfg is set to
+ * RTE_EVENT_QUEUE_CFG_ALL_TYPES
*/
-uint8_t
-rte_event_queue_priority(uint8_t dev_id, uint8_t queue_id);
+int
+rte_event_queue_attr_get(uint8_t dev_id, uint8_t queue_id, uint32_t attr_id,
+ uint32_t *attr_value);
/* Event port specific APIs */
@@ -715,47 +739,33 @@ rte_event_port_setup(uint8_t dev_id, uint8_t port_id,
const struct rte_event_port_conf *port_conf);
/**
- * Get the number of dequeue queue depth configured for event port designated
- * by its *port_id* on a specific event device
- *
- * @param dev_id
- * Event device identifier.
- * @param port_id
- * Event port identifier.
- * @return
- * - The number of configured dequeue queue depth
- *
- * @see rte_event_dequeue_burst()
+ * The queue depth of the port on the enqueue side
*/
-uint8_t
-rte_event_port_dequeue_depth(uint8_t dev_id, uint8_t port_id);
-
+#define RTE_EVENT_PORT_ATTR_ENQ_DEPTH 0
/**
- * Get the number of enqueue queue depth configured for event port designated
- * by its *port_id* on a specific event device
- *
- * @param dev_id
- * Event device identifier.
- * @param port_id
- * Event port identifier.
- * @return
- * - The number of configured enqueue queue depth
- *
- * @see rte_event_enqueue_burst()
+ * The queue depth of the port on the dequeue side
*/
-uint8_t
-rte_event_port_enqueue_depth(uint8_t dev_id, uint8_t port_id);
+#define RTE_EVENT_PORT_ATTR_DEQ_DEPTH 1
+/**
+ * The new event threshold of the port
+ */
+#define RTE_EVENT_PORT_ATTR_NEW_EVENT_THRESHOLD 2
/**
- * Get the number of ports on a specific event device
+ * Get an attribute from a port.
*
- * @param dev_id
- * Event device identifier.
- * @return
- * - The number of configured ports
+ * @param dev_id Eventdev id
+ * @param port_id Eventdev port id
+ * @param attr_id The attribute ID to retrieve
+ * @param[out] attr_value A pointer that will be filled in with the attribute
+ * value if successful
+ *
+ * @retval 0 Successfully returned value
+ * -EINVAL Invalid device, port or attr_id, or attr_value was NULL
*/
-uint8_t
-rte_event_port_count(uint8_t dev_id);
+int
+rte_event_port_attr_get(uint8_t dev_id, uint8_t port_id, uint32_t attr_id,
+ uint32_t *attr_value);
/**
* Start an event device.
@@ -871,6 +881,8 @@ rte_event_dev_close(uint8_t dev_id);
/**< The event generated from cpu for pipelining.
* Application may use *sub_event_type* to further classify the event
*/
+#define RTE_EVENT_TYPE_ETH_RX_ADAPTER 0x4
+/**< The event generated from event eth Rx adapter */
#define RTE_EVENT_TYPE_MAX 0x10
/**< Maximum number of event types */
@@ -882,7 +894,10 @@ rte_event_dev_close(uint8_t dev_id);
#define RTE_EVENT_OP_FORWARD 1
/**< The CPU use this operation to forward the event to different event queue or
* change to new application specific flow or schedule type to enable
- * pipelining
+ * pipelining.
+ *
+ * This operation must only be enqueued to the same port that the
+ * event to be forwarded was dequeued from.
*/
#define RTE_EVENT_OP_RELEASE 2
/**< Release the flow context associated with the schedule type.
@@ -912,6 +927,9 @@ rte_event_dev_close(uint8_t dev_id);
* or no scheduling context is held then this function may be an NOOP,
* depending on the implementation.
*
+ * This operation must only be enqueued to the same port that the
+ * event to be released was dequeued from.
+ *
*/
/**
@@ -990,14 +1008,50 @@ struct rte_event {
};
};
+/* Ethdev Rx adapter capability bitmap flags */
+#define RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT 0x1
+/**< This flag is sent when the packet transfer mechanism is in HW.
+ * Ethdev can send packets to the event device using internal event port.
+ */
+#define RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ 0x2
+/**< Adapter supports multiple event queues per ethdev. Every ethdev
+ * Rx queue can be connected to a unique event queue.
+ */
+#define RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID 0x4
+/**< The application can override the adapter generated flow ID in the
+ * event. This flow ID can be specified when adding an ethdev Rx queue
+ * to the adapter using the ev member of struct rte_event_eth_rx_adapter
+ * @see struct rte_event_eth_rx_adapter_queue_conf::ev
+ * @see struct rte_event_eth_rx_adapter_queue_conf::rx_queue_flags
+ */
+
+/**
+ * Retrieve the event device's ethdev Rx adapter capabilities for the
+ * specified ethernet port
+ *
+ * @param dev_id
+ * The identifier of the device.
+ *
+ * @param eth_port_id
+ * The identifier of the ethernet device.
+ *
+ * @param[out] caps
+ * A pointer to memory filled with Rx event adapter capabilities.
+ *
+ * @return
+ * - 0: Success, driver provides Rx event adapter capabilities for the
+ * ethernet device.
+ * - <0: Error code returned by the driver function.
+ *
+ */
+int
+rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint8_t eth_port_id,
+ uint32_t *caps);
struct rte_eventdev_driver;
struct rte_eventdev_ops;
struct rte_eventdev;
-typedef void (*event_schedule_t)(struct rte_eventdev *dev);
-/**< @internal Schedule one or more events in the event dev. */
-
typedef uint16_t (*event_enqueue_t)(void *port, const struct rte_event *ev);
/**< @internal Enqueue event on port of a device */
@@ -1034,12 +1088,10 @@ struct rte_eventdev_data {
/**< Number of event ports. */
void **ports;
/**< Array of pointers to ports. */
- uint8_t *ports_dequeue_depth;
- /**< Array of port dequeue depth. */
- uint8_t *ports_enqueue_depth;
- /**< Array of port enqueue depth. */
- uint8_t *queues_prio;
- /**< Array of queue priority. */
+ struct rte_event_port_conf *ports_cfg;
+ /**< Array of port configuration structures. */
+ struct rte_event_queue_conf *queues_cfg;
+ /**< Array of queue configuration structures. */
uint16_t *links_map;
/**< Memory to store queues to port connections. */
void *dev_private;
@@ -1048,6 +1100,10 @@ struct rte_eventdev_data {
/**< Event device capabilities(RTE_EVENT_DEV_CAP_)*/
struct rte_event_dev_config dev_conf;
/**< Configuration applied to device. */
+ uint8_t service_inited;
+ /* Service initialization state */
+ uint32_t service_id;
+ /* Service ID*/
RTE_STD_C11
uint8_t dev_started : 1;
@@ -1059,8 +1115,6 @@ struct rte_eventdev_data {
/** @internal The data structure associated with each event device. */
struct rte_eventdev {
- event_schedule_t schedule;
- /**< Pointer to PMD schedule function. */
event_enqueue_t enqueue;
/**< Pointer to PMD enqueue function. */
event_enqueue_burst_t enqueue_burst;
@@ -1089,24 +1143,6 @@ struct rte_eventdev {
extern struct rte_eventdev *rte_eventdevs;
/** @internal The pool of rte_eventdev structures. */
-
-/**
- * Schedule one or more events in the event dev.
- *
- * An event dev implementation may define this is a NOOP, for instance if
- * the event dev performs its scheduling in hardware.
- *
- * @param dev_id
- * The identifier of the device.
- */
-static inline void
-rte_event_schedule(uint8_t dev_id)
-{
- struct rte_eventdev *dev = &rte_eventdevs[dev_id];
- if (*dev->schedule)
- (*dev->schedule)(dev);
-}
-
static __rte_always_inline uint16_t
__rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id,
const struct rte_event ev[], uint16_t nb_events,
@@ -1144,6 +1180,9 @@ __rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id,
* The *nb_events* parameter is the number of event objects to enqueue which are
* supplied in the *ev* array of *rte_event* structure.
*
+ * Event operations RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_RELEASE must only be
+ * enqueued to the same port that their associated events were dequeued from.
+ *
* The rte_event_enqueue_burst() function returns the number of
* events objects it actually enqueued. A return value equal to *nb_events*
* means that all event objects have been enqueued.
@@ -1346,6 +1385,9 @@ rte_event_dequeue_timeout_ticks(uint8_t dev_id, uint64_t ns,
* with RTE_EVENT_OP_RELEASE operation can be used to release the
* contexts early.
*
+ * Event operations RTE_EVENT_OP_FORWARD and RTE_EVENT_OP_RELEASE must only be
+ * enqueued to the same port that their associated events were dequeued from.
+ *
* @param dev_id
* The identifier of the device.
* @param port_id
@@ -1545,6 +1587,24 @@ rte_event_port_links_get(uint8_t dev_id, uint8_t port_id,
uint8_t queues[], uint8_t priorities[]);
/**
+ * Retrieve the service ID of the event dev. If the adapter doesn't use
+ * a rte_service function, this function returns -ESRCH.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ *
+ * @param [out] service_id
+ * A pointer to a uint32_t, to be filled in with the service id.
+ *
+ * @return
+ * - 0: Success
+ * - <0: Error code on failure, if the event dev doesn't use a rte_service
+ * function, this function returns -ESRCH.
+ */
+int
+rte_event_dev_service_id_get(uint8_t dev_id, uint32_t *service_id);
+
+/**
* Dump internal information about *dev_id* to the FILE* provided in *f*.
*
* @param dev_id
diff --git a/lib/librte_eventdev/rte_eventdev_pmd.h b/lib/librte_eventdev/rte_eventdev_pmd.h
index 3d72acf3..7a206c56 100644
--- a/lib/librte_eventdev/rte_eventdev_pmd.h
+++ b/lib/librte_eventdev/rte_eventdev_pmd.h
@@ -83,9 +83,19 @@ extern "C" {
} \
} while (0)
+#define RTE_EVENT_ETH_RX_ADAPTER_SW_CAP \
+ ((RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID) | \
+ (RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ))
+
+/**< Ethernet Rx adapter cap to return If the packet transfers from
+ * the ethdev to eventdev use a SW service function
+ */
+
#define RTE_EVENTDEV_DETACHED (0)
#define RTE_EVENTDEV_ATTACHED (1)
+struct rte_eth_dev;
+
/** Global structure used for maintaining state of allocated event devices */
struct rte_eventdev_global {
uint8_t nb_devs; /**< Number of devices found */
@@ -429,6 +439,163 @@ typedef int (*eventdev_xstats_get_names_t)(const struct rte_eventdev *dev,
typedef uint64_t (*eventdev_xstats_get_by_name)(const struct rte_eventdev *dev,
const char *name, unsigned int *id);
+
+/**
+ * Retrieve the event device's ethdev Rx adapter capabilities for the
+ * specified ethernet port
+ *
+ * @param dev
+ * Event device pointer
+ *
+ * @param eth_dev
+ * Ethernet device pointer
+ *
+ * @param[out] caps
+ * A pointer to memory filled with Rx event adapter capabilities.
+ *
+ * @return
+ * - 0: Success, driver provides Rx event adapter capabilities for the
+ * ethernet device.
+ * - <0: Error code returned by the driver function.
+ *
+ */
+typedef int (*eventdev_eth_rx_adapter_caps_get_t)
+ (const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev,
+ uint32_t *caps);
+
+struct rte_event_eth_rx_adapter_queue_conf *queue_conf;
+
+/**
+ * Add ethernet Rx queues to event device. This callback is invoked if
+ * the caps returned from rte_eventdev_eth_rx_adapter_caps_get(, eth_port_id)
+ * has RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT set.
+ *
+ * @param dev
+ * Event device pointer
+ *
+ * @param eth_dev
+ * Ethernet device pointer
+ *
+ * @param rx_queue_id
+ * Ethernet device receive queue index
+ *
+ * @param queue_conf
+ * Additional configuration structure
+
+ * @return
+ * - 0: Success, ethernet receive queue added successfully.
+ * - <0: Error code returned by the driver function.
+ *
+ */
+typedef int (*eventdev_eth_rx_adapter_queue_add_t)(
+ const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *queue_conf);
+
+/**
+ * Delete ethernet Rx queues from event device. This callback is invoked if
+ * the caps returned from eventdev_eth_rx_adapter_caps_get(, eth_port_id)
+ * has RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT set.
+ *
+ * @param dev
+ * Event device pointer
+ *
+ * @param eth_dev
+ * Ethernet device pointer
+ *
+ * @param rx_queue_id
+ * Ethernet device receive queue index
+ *
+ * @return
+ * - 0: Success, ethernet receive queue deleted successfully.
+ * - <0: Error code returned by the driver function.
+ *
+ */
+typedef int (*eventdev_eth_rx_adapter_queue_del_t)
+ (const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev,
+ int32_t rx_queue_id);
+
+/**
+ * Start ethernet Rx adapter. This callback is invoked if
+ * the caps returned from eventdev_eth_rx_adapter_caps_get(.., eth_port_id)
+ * has RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT set and Rx queues
+ * from eth_port_id have been added to the event device.
+ *
+ * @param dev
+ * Event device pointer
+ *
+ * @param eth_dev
+ * Ethernet device pointer
+ *
+ * @return
+ * - 0: Success, ethernet Rx adapter started successfully.
+ * - <0: Error code returned by the driver function.
+ */
+typedef int (*eventdev_eth_rx_adapter_start_t)
+ (const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev);
+
+/**
+ * Stop ethernet Rx adapter. This callback is invoked if
+ * the caps returned from eventdev_eth_rx_adapter_caps_get(..,eth_port_id)
+ * has RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT set and Rx queues
+ * from eth_port_id have been added to the event device.
+ *
+ * @param dev
+ * Event device pointer
+ *
+ * @param eth_dev
+ * Ethernet device pointer
+ *
+ * @return
+ * - 0: Success, ethernet Rx adapter stopped successfully.
+ * - <0: Error code returned by the driver function.
+ */
+typedef int (*eventdev_eth_rx_adapter_stop_t)
+ (const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev);
+
+struct rte_event_eth_rx_adapter_stats *stats;
+
+/**
+ * Retrieve ethernet Rx adapter statistics.
+ *
+ * @param dev
+ * Event device pointer
+ *
+ * @param eth_dev
+ * Ethernet device pointer
+ *
+ * @param[out] stats
+ * Pointer to stats structure
+ *
+ * @return
+ * Return 0 on success.
+ */
+
+typedef int (*eventdev_eth_rx_adapter_stats_get)
+ (const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev,
+ struct rte_event_eth_rx_adapter_stats *stats);
+/**
+ * Reset ethernet Rx adapter statistics.
+ *
+ * @param dev
+ * Event device pointer
+ *
+ * @param eth_dev
+ * Ethernet device pointer
+ *
+ * @return
+ * Return 0 on success.
+ */
+typedef int (*eventdev_eth_rx_adapter_stats_reset)
+ (const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev);
+
/** Event device operations function pointer table */
struct rte_eventdev_ops {
eventdev_info_get_t dev_infos_get; /**< Get device info. */
@@ -468,6 +635,21 @@ struct rte_eventdev_ops {
/**< Get one value by name. */
eventdev_xstats_reset_t xstats_reset;
/**< Reset the statistics values in xstats. */
+
+ eventdev_eth_rx_adapter_caps_get_t eth_rx_adapter_caps_get;
+ /**< Get ethernet Rx adapter capabilities */
+ eventdev_eth_rx_adapter_queue_add_t eth_rx_adapter_queue_add;
+ /**< Add Rx queues to ethernet Rx adapter */
+ eventdev_eth_rx_adapter_queue_del_t eth_rx_adapter_queue_del;
+ /**< Delete Rx queues from ethernet Rx adapter */
+ eventdev_eth_rx_adapter_start_t eth_rx_adapter_start;
+ /**< Start ethernet Rx adapter */
+ eventdev_eth_rx_adapter_stop_t eth_rx_adapter_stop;
+ /**< Stop ethernet Rx adapter */
+ eventdev_eth_rx_adapter_stats_get eth_rx_adapter_stats_get;
+ /**< Get ethernet Rx stats */
+ eventdev_eth_rx_adapter_stats_reset eth_rx_adapter_stats_reset;
+ /**< Reset ethernet Rx stats */
};
/**
diff --git a/lib/librte_eventdev/rte_eventdev_pmd_pci.h b/lib/librte_eventdev/rte_eventdev_pmd_pci.h
index b6bd7319..ade32b5d 100644
--- a/lib/librte_eventdev/rte_eventdev_pmd_pci.h
+++ b/lib/librte_eventdev/rte_eventdev_pmd_pci.h
@@ -50,6 +50,7 @@ extern "C" {
#include <rte_eal.h>
#include <rte_lcore.h>
#include <rte_pci.h>
+#include <rte_bus_pci.h>
#include "rte_eventdev_pmd.h"
diff --git a/lib/librte_eventdev/rte_eventdev_pmd_vdev.h b/lib/librte_eventdev/rte_eventdev_pmd_vdev.h
index 135e8b80..56232dec 100644
--- a/lib/librte_eventdev/rte_eventdev_pmd_vdev.h
+++ b/lib/librte_eventdev/rte_eventdev_pmd_vdev.h
@@ -48,7 +48,7 @@ extern "C" {
#include <rte_debug.h>
#include <rte_eal.h>
-#include <rte_vdev.h>
+#include <rte_bus_vdev.h>
#include "rte_eventdev_pmd.h"
diff --git a/lib/librte_eventdev/rte_eventdev_version.map b/lib/librte_eventdev/rte_eventdev_version.map
index 4c48e5f0..108ae61f 100644
--- a/lib/librte_eventdev/rte_eventdev_version.map
+++ b/lib/librte_eventdev/rte_eventdev_version.map
@@ -19,17 +19,12 @@ DPDK_17.05 {
rte_event_port_default_conf_get;
rte_event_port_setup;
- rte_event_port_dequeue_depth;
- rte_event_port_enqueue_depth;
- rte_event_port_count;
rte_event_port_link;
rte_event_port_unlink;
rte_event_port_links_get;
rte_event_queue_default_conf_get;
rte_event_queue_setup;
- rte_event_queue_count;
- rte_event_queue_priority;
rte_event_dequeue_timeout_ticks;
@@ -51,3 +46,25 @@ DPDK_17.08 {
rte_event_ring_init;
rte_event_ring_lookup;
} DPDK_17.05;
+
+DPDK_17.11 {
+ global:
+
+ rte_event_dev_attr_get;
+ rte_event_dev_service_id_get;
+ rte_event_port_attr_get;
+ rte_event_queue_attr_get;
+
+ rte_event_eth_rx_adapter_caps_get;
+ rte_event_eth_rx_adapter_create;
+ rte_event_eth_rx_adapter_create_ext;
+ rte_event_eth_rx_adapter_free;
+ rte_event_eth_rx_adapter_queue_add;
+ rte_event_eth_rx_adapter_queue_del;
+ rte_event_eth_rx_adapter_service_id_get;
+ rte_event_eth_rx_adapter_start;
+ rte_event_eth_rx_adapter_stats_get;
+ rte_event_eth_rx_adapter_stats_reset;
+ rte_event_eth_rx_adapter_stop;
+
+} DPDK_17.08;
diff --git a/lib/librte_flow_classify/Makefile b/lib/librte_flow_classify/Makefile
new file mode 100644
index 00000000..ea792f5d
--- /dev/null
+++ b/lib/librte_flow_classify/Makefile
@@ -0,0 +1,53 @@
+# BSD LICENSE
+#
+# Copyright(c) 2017 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_flow_classify.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+EXPORT_MAP := rte_flow_classify_version.map
+
+LIBABIVER := 1
+
+LDLIBS += -lrte_eal -lrte_ethdev -lrte_net -lrte_table -lrte_acl
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += rte_flow_classify.c
+SRCS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += rte_flow_classify_parse.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY)-include := rte_flow_classify.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_flow_classify/rte_flow_classify.c b/lib/librte_flow_classify/rte_flow_classify.c
new file mode 100644
index 00000000..e6f44864
--- /dev/null
+++ b/lib/librte_flow_classify/rte_flow_classify.c
@@ -0,0 +1,691 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_flow_classify.h>
+#include "rte_flow_classify_parse.h"
+#include <rte_flow_driver.h>
+#include <rte_table_acl.h>
+#include <stdbool.h>
+
+int librte_flow_classify_logtype;
+
+static struct rte_eth_ntuple_filter ntuple_filter;
+static uint32_t unique_id = 1;
+
+
+struct rte_flow_classify_table_entry {
+ /* meta-data for classify rule */
+ uint32_t rule_id;
+};
+
+struct rte_table {
+ /* Input parameters */
+ struct rte_table_ops ops;
+ uint32_t entry_size;
+ enum rte_flow_classify_table_type type;
+
+ /* Handle to the low-level table object */
+ void *h_table;
+};
+
+#define RTE_FLOW_CLASSIFIER_MAX_NAME_SZ 256
+
+struct rte_flow_classifier {
+ /* Input parameters */
+ char name[RTE_FLOW_CLASSIFIER_MAX_NAME_SZ];
+ int socket_id;
+ enum rte_flow_classify_table_type type;
+
+ /* Internal tables */
+ struct rte_table tables[RTE_FLOW_CLASSIFY_TABLE_MAX];
+ uint32_t num_tables;
+ uint16_t nb_pkts;
+ struct rte_flow_classify_table_entry
+ *entries[RTE_PORT_IN_BURST_SIZE_MAX];
+} __rte_cache_aligned;
+
+enum {
+ PROTO_FIELD_IPV4,
+ SRC_FIELD_IPV4,
+ DST_FIELD_IPV4,
+ SRCP_FIELD_IPV4,
+ DSTP_FIELD_IPV4,
+ NUM_FIELDS_IPV4
+};
+
+struct acl_keys {
+ struct rte_table_acl_rule_add_params key_add; /* add key */
+ struct rte_table_acl_rule_delete_params key_del; /* delete key */
+};
+
+struct classify_rules {
+ enum rte_flow_classify_rule_type type;
+ union {
+ struct rte_flow_classify_ipv4_5tuple ipv4_5tuple;
+ } u;
+};
+
+struct rte_flow_classify_rule {
+ uint32_t id; /* unique ID of classify rule */
+ struct rte_flow_action action; /* action when match found */
+ struct classify_rules rules; /* union of rules */
+ union {
+ struct acl_keys key;
+ } u;
+ int key_found; /* rule key found in table */
+ void *entry; /* pointer to buffer to hold rule meta data */
+ void *entry_ptr; /* handle to the table entry for rule meta data */
+};
+
+static int
+flow_classify_parse_flow(
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item pattern[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct rte_flow_item *items;
+ parse_filter_t parse_filter;
+ uint32_t item_num = 0;
+ uint32_t i = 0;
+ int ret;
+
+ memset(&ntuple_filter, 0, sizeof(ntuple_filter));
+
+ /* Get the non-void item number of pattern */
+ while ((pattern + i)->type != RTE_FLOW_ITEM_TYPE_END) {
+ if ((pattern + i)->type != RTE_FLOW_ITEM_TYPE_VOID)
+ item_num++;
+ i++;
+ }
+ item_num++;
+
+ items = malloc(item_num * sizeof(struct rte_flow_item));
+ if (!items) {
+ rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_ITEM_NUM,
+ NULL, "No memory for pattern items.");
+ return -ENOMEM;
+ }
+
+ memset(items, 0, item_num * sizeof(struct rte_flow_item));
+ classify_pattern_skip_void_item(items, pattern);
+
+ parse_filter = classify_find_parse_filter_func(items);
+ if (!parse_filter) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ pattern, "Unsupported pattern");
+ free(items);
+ return -EINVAL;
+ }
+
+ ret = parse_filter(attr, items, actions, &ntuple_filter, error);
+ free(items);
+ return ret;
+}
+
+
+#define uint32_t_to_char(ip, a, b, c, d) do {\
+ *a = (unsigned char)(ip >> 24 & 0xff);\
+ *b = (unsigned char)(ip >> 16 & 0xff);\
+ *c = (unsigned char)(ip >> 8 & 0xff);\
+ *d = (unsigned char)(ip & 0xff);\
+ } while (0)
+
+static inline void
+print_acl_ipv4_key_add(struct rte_table_acl_rule_add_params *key)
+{
+ unsigned char a, b, c, d;
+
+ printf("%s: 0x%02hhx/0x%hhx ", __func__,
+ key->field_value[PROTO_FIELD_IPV4].value.u8,
+ key->field_value[PROTO_FIELD_IPV4].mask_range.u8);
+
+ uint32_t_to_char(key->field_value[SRC_FIELD_IPV4].value.u32,
+ &a, &b, &c, &d);
+ printf(" %hhu.%hhu.%hhu.%hhu/0x%x ", a, b, c, d,
+ key->field_value[SRC_FIELD_IPV4].mask_range.u32);
+
+ uint32_t_to_char(key->field_value[DST_FIELD_IPV4].value.u32,
+ &a, &b, &c, &d);
+ printf("%hhu.%hhu.%hhu.%hhu/0x%x ", a, b, c, d,
+ key->field_value[DST_FIELD_IPV4].mask_range.u32);
+
+ printf("%hu : 0x%x %hu : 0x%x",
+ key->field_value[SRCP_FIELD_IPV4].value.u16,
+ key->field_value[SRCP_FIELD_IPV4].mask_range.u16,
+ key->field_value[DSTP_FIELD_IPV4].value.u16,
+ key->field_value[DSTP_FIELD_IPV4].mask_range.u16);
+
+ printf(" priority: 0x%x\n", key->priority);
+}
+
+static inline void
+print_acl_ipv4_key_delete(struct rte_table_acl_rule_delete_params *key)
+{
+ unsigned char a, b, c, d;
+
+ printf("%s: 0x%02hhx/0x%hhx ", __func__,
+ key->field_value[PROTO_FIELD_IPV4].value.u8,
+ key->field_value[PROTO_FIELD_IPV4].mask_range.u8);
+
+ uint32_t_to_char(key->field_value[SRC_FIELD_IPV4].value.u32,
+ &a, &b, &c, &d);
+ printf(" %hhu.%hhu.%hhu.%hhu/0x%x ", a, b, c, d,
+ key->field_value[SRC_FIELD_IPV4].mask_range.u32);
+
+ uint32_t_to_char(key->field_value[DST_FIELD_IPV4].value.u32,
+ &a, &b, &c, &d);
+ printf("%hhu.%hhu.%hhu.%hhu/0x%x ", a, b, c, d,
+ key->field_value[DST_FIELD_IPV4].mask_range.u32);
+
+ printf("%hu : 0x%x %hu : 0x%x\n",
+ key->field_value[SRCP_FIELD_IPV4].value.u16,
+ key->field_value[SRCP_FIELD_IPV4].mask_range.u16,
+ key->field_value[DSTP_FIELD_IPV4].value.u16,
+ key->field_value[DSTP_FIELD_IPV4].mask_range.u16);
+}
+
+static int
+rte_flow_classifier_check_params(struct rte_flow_classifier_params *params)
+{
+ if (params == NULL) {
+ RTE_FLOW_CLASSIFY_LOG(ERR,
+ "%s: Incorrect value for parameter params\n", __func__);
+ return -EINVAL;
+ }
+
+ /* name */
+ if (params->name == NULL) {
+ RTE_FLOW_CLASSIFY_LOG(ERR,
+ "%s: Incorrect value for parameter name\n", __func__);
+ return -EINVAL;
+ }
+
+ /* socket */
+ if ((params->socket_id < 0) ||
+ (params->socket_id >= RTE_MAX_NUMA_NODES)) {
+ RTE_FLOW_CLASSIFY_LOG(ERR,
+ "%s: Incorrect value for parameter socket_id\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+struct rte_flow_classifier *
+rte_flow_classifier_create(struct rte_flow_classifier_params *params)
+{
+ struct rte_flow_classifier *cls;
+ int ret;
+
+ /* Check input parameters */
+ ret = rte_flow_classifier_check_params(params);
+ if (ret != 0) {
+ RTE_FLOW_CLASSIFY_LOG(ERR,
+ "%s: flow classifier params check failed (%d)\n",
+ __func__, ret);
+ return NULL;
+ }
+
+ /* Allocate memory for the flow classifier */
+ cls = rte_zmalloc_socket("FLOW_CLASSIFIER",
+ sizeof(struct rte_flow_classifier),
+ RTE_CACHE_LINE_SIZE, params->socket_id);
+
+ if (cls == NULL) {
+ RTE_FLOW_CLASSIFY_LOG(ERR,
+ "%s: flow classifier memory allocation failed\n",
+ __func__);
+ return NULL;
+ }
+
+ /* Save input parameters */
+ snprintf(cls->name, RTE_FLOW_CLASSIFIER_MAX_NAME_SZ, "%s",
+ params->name);
+ cls->socket_id = params->socket_id;
+ cls->type = params->type;
+
+ /* Initialize flow classifier internal data structure */
+ cls->num_tables = 0;
+
+ return cls;
+}
+
+static void
+rte_flow_classify_table_free(struct rte_table *table)
+{
+ if (table->ops.f_free != NULL)
+ table->ops.f_free(table->h_table);
+}
+
+int
+rte_flow_classifier_free(struct rte_flow_classifier *cls)
+{
+ uint32_t i;
+
+ /* Check input parameters */
+ if (cls == NULL) {
+ RTE_FLOW_CLASSIFY_LOG(ERR,
+ "%s: rte_flow_classifier parameter is NULL\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ /* Free tables */
+ for (i = 0; i < cls->num_tables; i++) {
+ struct rte_table *table = &cls->tables[i];
+
+ rte_flow_classify_table_free(table);
+ }
+
+ /* Free flow classifier memory */
+ rte_free(cls);
+
+ return 0;
+}
+
+static int
+rte_table_check_params(struct rte_flow_classifier *cls,
+ struct rte_flow_classify_table_params *params,
+ uint32_t *table_id)
+{
+ if (cls == NULL) {
+ RTE_FLOW_CLASSIFY_LOG(ERR,
+ "%s: flow classifier parameter is NULL\n",
+ __func__);
+ return -EINVAL;
+ }
+ if (params == NULL) {
+ RTE_FLOW_CLASSIFY_LOG(ERR, "%s: params parameter is NULL\n",
+ __func__);
+ return -EINVAL;
+ }
+ if (table_id == NULL) {
+ RTE_FLOW_CLASSIFY_LOG(ERR, "%s: table_id parameter is NULL\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ /* ops */
+ if (params->ops == NULL) {
+ RTE_FLOW_CLASSIFY_LOG(ERR, "%s: params->ops is NULL\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ if (params->ops->f_create == NULL) {
+ RTE_FLOW_CLASSIFY_LOG(ERR,
+ "%s: f_create function pointer is NULL\n", __func__);
+ return -EINVAL;
+ }
+
+ if (params->ops->f_lookup == NULL) {
+ RTE_FLOW_CLASSIFY_LOG(ERR,
+ "%s: f_lookup function pointer is NULL\n", __func__);
+ return -EINVAL;
+ }
+
+ /* De we have room for one more table? */
+ if (cls->num_tables == RTE_FLOW_CLASSIFY_TABLE_MAX) {
+ RTE_FLOW_CLASSIFY_LOG(ERR,
+ "%s: Incorrect value for num_tables parameter\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int
+rte_flow_classify_table_create(struct rte_flow_classifier *cls,
+ struct rte_flow_classify_table_params *params,
+ uint32_t *table_id)
+{
+ struct rte_table *table;
+ void *h_table;
+ uint32_t entry_size, id;
+ int ret;
+
+ /* Check input arguments */
+ ret = rte_table_check_params(cls, params, table_id);
+ if (ret != 0)
+ return ret;
+
+ id = cls->num_tables;
+ table = &cls->tables[id];
+
+ /* calculate table entry size */
+ entry_size = sizeof(struct rte_flow_classify_table_entry);
+
+ /* Create the table */
+ h_table = params->ops->f_create(params->arg_create, cls->socket_id,
+ entry_size);
+ if (h_table == NULL) {
+ RTE_FLOW_CLASSIFY_LOG(ERR, "%s: Table creation failed\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ /* Commit current table to the classifier */
+ cls->num_tables++;
+ *table_id = id;
+
+ /* Save input parameters */
+ memcpy(&table->ops, params->ops, sizeof(struct rte_table_ops));
+
+ /* Initialize table internal data structure */
+ table->entry_size = entry_size;
+ table->h_table = h_table;
+
+ return 0;
+}
+
+static struct rte_flow_classify_rule *
+allocate_acl_ipv4_5tuple_rule(void)
+{
+ struct rte_flow_classify_rule *rule;
+ int log_level;
+
+ rule = malloc(sizeof(struct rte_flow_classify_rule));
+ if (!rule)
+ return rule;
+
+ memset(rule, 0, sizeof(struct rte_flow_classify_rule));
+ rule->id = unique_id++;
+ rule->rules.type = RTE_FLOW_CLASSIFY_RULE_TYPE_IPV4_5TUPLE;
+
+ memcpy(&rule->action, classify_get_flow_action(),
+ sizeof(struct rte_flow_action));
+
+ /* key add values */
+ rule->u.key.key_add.priority = ntuple_filter.priority;
+ rule->u.key.key_add.field_value[PROTO_FIELD_IPV4].mask_range.u8 =
+ ntuple_filter.proto_mask;
+ rule->u.key.key_add.field_value[PROTO_FIELD_IPV4].value.u8 =
+ ntuple_filter.proto;
+ rule->rules.u.ipv4_5tuple.proto = ntuple_filter.proto;
+ rule->rules.u.ipv4_5tuple.proto_mask = ntuple_filter.proto_mask;
+
+ rule->u.key.key_add.field_value[SRC_FIELD_IPV4].mask_range.u32 =
+ ntuple_filter.src_ip_mask;
+ rule->u.key.key_add.field_value[SRC_FIELD_IPV4].value.u32 =
+ ntuple_filter.src_ip;
+ rule->rules.u.ipv4_5tuple.src_ip_mask = ntuple_filter.src_ip_mask;
+ rule->rules.u.ipv4_5tuple.src_ip = ntuple_filter.src_ip;
+
+ rule->u.key.key_add.field_value[DST_FIELD_IPV4].mask_range.u32 =
+ ntuple_filter.dst_ip_mask;
+ rule->u.key.key_add.field_value[DST_FIELD_IPV4].value.u32 =
+ ntuple_filter.dst_ip;
+ rule->rules.u.ipv4_5tuple.dst_ip_mask = ntuple_filter.dst_ip_mask;
+ rule->rules.u.ipv4_5tuple.dst_ip = ntuple_filter.dst_ip;
+
+ rule->u.key.key_add.field_value[SRCP_FIELD_IPV4].mask_range.u16 =
+ ntuple_filter.src_port_mask;
+ rule->u.key.key_add.field_value[SRCP_FIELD_IPV4].value.u16 =
+ ntuple_filter.src_port;
+ rule->rules.u.ipv4_5tuple.src_port_mask = ntuple_filter.src_port_mask;
+ rule->rules.u.ipv4_5tuple.src_port = ntuple_filter.src_port;
+
+ rule->u.key.key_add.field_value[DSTP_FIELD_IPV4].mask_range.u16 =
+ ntuple_filter.dst_port_mask;
+ rule->u.key.key_add.field_value[DSTP_FIELD_IPV4].value.u16 =
+ ntuple_filter.dst_port;
+ rule->rules.u.ipv4_5tuple.dst_port_mask = ntuple_filter.dst_port_mask;
+ rule->rules.u.ipv4_5tuple.dst_port = ntuple_filter.dst_port;
+
+ log_level = rte_log_get_level(librte_flow_classify_logtype);
+
+ if (log_level == RTE_LOG_DEBUG)
+ print_acl_ipv4_key_add(&rule->u.key.key_add);
+
+ /* key delete values */
+ memcpy(&rule->u.key.key_del.field_value[PROTO_FIELD_IPV4],
+ &rule->u.key.key_add.field_value[PROTO_FIELD_IPV4],
+ NUM_FIELDS_IPV4 * sizeof(struct rte_acl_field));
+
+ if (log_level == RTE_LOG_DEBUG)
+ print_acl_ipv4_key_delete(&rule->u.key.key_del);
+
+ return rule;
+}
+
+struct rte_flow_classify_rule *
+rte_flow_classify_table_entry_add(struct rte_flow_classifier *cls,
+ uint32_t table_id,
+ int *key_found,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item pattern[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct rte_flow_classify_rule *rule;
+ struct rte_flow_classify_table_entry *table_entry;
+ int ret;
+
+ if (!error)
+ return NULL;
+
+ if (!cls) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, "NULL classifier.");
+ return NULL;
+ }
+
+ if (table_id >= cls->num_tables) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, "invalid table_id.");
+ return NULL;
+ }
+
+ if (key_found == NULL) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, "NULL key_found.");
+ return NULL;
+ }
+
+ if (!pattern) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM_NUM,
+ NULL, "NULL pattern.");
+ return NULL;
+ }
+
+ if (!actions) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION_NUM,
+ NULL, "NULL action.");
+ return NULL;
+ }
+
+ if (!attr) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ATTR,
+ NULL, "NULL attribute.");
+ return NULL;
+ }
+
+ /* parse attr, pattern and actions */
+ ret = flow_classify_parse_flow(attr, pattern, actions, error);
+ if (ret < 0)
+ return NULL;
+
+ switch (cls->type) {
+ case RTE_FLOW_CLASSIFY_TABLE_TYPE_ACL:
+ rule = allocate_acl_ipv4_5tuple_rule();
+ if (!rule)
+ return NULL;
+ break;
+ default:
+ return NULL;
+ }
+
+ rule->entry = malloc(sizeof(struct rte_flow_classify_table_entry));
+ if (!rule->entry) {
+ free(rule);
+ return NULL;
+ }
+
+ table_entry = rule->entry;
+ table_entry->rule_id = rule->id;
+
+ if (cls->tables[table_id].ops.f_add != NULL) {
+ ret = cls->tables[table_id].ops.f_add(
+ cls->tables[table_id].h_table,
+ &rule->u.key.key_add,
+ rule->entry,
+ &rule->key_found,
+ &rule->entry_ptr);
+ if (ret) {
+ free(rule->entry);
+ free(rule);
+ return NULL;
+ }
+ *key_found = rule->key_found;
+ }
+ return rule;
+}
+
+int
+rte_flow_classify_table_entry_delete(struct rte_flow_classifier *cls,
+ uint32_t table_id,
+ struct rte_flow_classify_rule *rule)
+{
+ int ret = -EINVAL;
+
+ if (!cls || !rule || table_id >= cls->num_tables)
+ return ret;
+
+ if (cls->tables[table_id].ops.f_delete != NULL)
+ ret = cls->tables[table_id].ops.f_delete(
+ cls->tables[table_id].h_table,
+ &rule->u.key.key_del,
+ &rule->key_found,
+ &rule->entry);
+
+ return ret;
+}
+
+static int
+flow_classifier_lookup(struct rte_flow_classifier *cls,
+ uint32_t table_id,
+ struct rte_mbuf **pkts,
+ const uint16_t nb_pkts)
+{
+ int ret = -EINVAL;
+ uint64_t pkts_mask;
+ uint64_t lookup_hit_mask;
+
+ pkts_mask = RTE_LEN2MASK(nb_pkts, uint64_t);
+ ret = cls->tables[table_id].ops.f_lookup(
+ cls->tables[table_id].h_table,
+ pkts, pkts_mask, &lookup_hit_mask,
+ (void **)cls->entries);
+
+ if (!ret && lookup_hit_mask)
+ cls->nb_pkts = nb_pkts;
+ else
+ cls->nb_pkts = 0;
+
+ return ret;
+}
+
+static int
+action_apply(struct rte_flow_classifier *cls,
+ struct rte_flow_classify_rule *rule,
+ struct rte_flow_classify_stats *stats)
+{
+ struct rte_flow_classify_ipv4_5tuple_stats *ntuple_stats;
+ uint64_t count = 0;
+ int i;
+ int ret = -EINVAL;
+
+ switch (rule->action.type) {
+ case RTE_FLOW_ACTION_TYPE_COUNT:
+ for (i = 0; i < cls->nb_pkts; i++) {
+ if (rule->id == cls->entries[i]->rule_id)
+ count++;
+ }
+ if (count) {
+ ret = 0;
+ ntuple_stats =
+ (struct rte_flow_classify_ipv4_5tuple_stats *)
+ stats->stats;
+ ntuple_stats->counter1 = count;
+ ntuple_stats->ipv4_5tuple = rule->rules.u.ipv4_5tuple;
+ }
+ break;
+ default:
+ ret = -ENOTSUP;
+ break;
+ }
+
+ return ret;
+}
+
+int
+rte_flow_classifier_query(struct rte_flow_classifier *cls,
+ uint32_t table_id,
+ struct rte_mbuf **pkts,
+ const uint16_t nb_pkts,
+ struct rte_flow_classify_rule *rule,
+ struct rte_flow_classify_stats *stats)
+{
+ int ret = -EINVAL;
+
+ if (!cls || !rule || !stats || !pkts || nb_pkts == 0 ||
+ table_id >= cls->num_tables)
+ return ret;
+
+ ret = flow_classifier_lookup(cls, table_id, pkts, nb_pkts);
+ if (!ret)
+ ret = action_apply(cls, rule, stats);
+ return ret;
+}
+
+RTE_INIT(librte_flow_classify_init_log);
+
+static void
+librte_flow_classify_init_log(void)
+{
+ librte_flow_classify_logtype =
+ rte_log_register("librte.flow_classify");
+ if (librte_flow_classify_logtype >= 0)
+ rte_log_set_level(librte_flow_classify_logtype, RTE_LOG_INFO);
+}
diff --git a/lib/librte_flow_classify/rte_flow_classify.h b/lib/librte_flow_classify/rte_flow_classify.h
new file mode 100644
index 00000000..1211873a
--- /dev/null
+++ b/lib/librte_flow_classify/rte_flow_classify.h
@@ -0,0 +1,289 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_FLOW_CLASSIFY_H_
+#define _RTE_FLOW_CLASSIFY_H_
+
+/**
+ * @file
+ *
+ * RTE Flow Classify Library
+ *
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * This library provides flow record information with some measured properties.
+ *
+ * Application should define the flow and measurement criteria (action) for it.
+ *
+ * The Library doesn't maintain any flow records itself, instead flow
+ * information is returned to upper layer only for given packets.
+ *
+ * It is application's responsibility to call rte_flow_classifier_query()
+ * for a burst of packets, just after receiving them or before transmitting
+ * them.
+ * Application should provide the flow type interested in, measurement to apply
+ * to that flow in rte_flow_classify_table_entry_add() API, and should provide
+ * the rte_flow_classifier object and storage to put results in for the
+ * rte_flow_classifier_query() API.
+ *
+ * Usage:
+ * - application calls rte_flow_classifier_create() to create an
+ * rte_flow_classifier object.
+ * - application calls rte_flow_classify_table_create() to create a table
+ * in the rte_flow_classifier object.
+ * - application calls rte_flow_classify_table_entry_add() to add a rule to
+ * the table in the rte_flow_classifier object.
+ * - application calls rte_flow_classifier_query() in a polling manner,
+ * preferably after rte_eth_rx_burst(). This will cause the library to
+ * match packet information to flow information with some measurements.
+ * - rte_flow_classifier object can be destroyed when it is no longer needed
+ * with rte_flow_classifier_free()
+ */
+
+#include <rte_ethdev.h>
+#include <rte_ether.h>
+#include <rte_flow.h>
+#include <rte_acl.h>
+#include <rte_table_acl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int librte_flow_classify_logtype;
+
+#define RTE_FLOW_CLASSIFY_LOG(level, fmt, args...) \
+rte_log(RTE_LOG_ ## level, librte_flow_classify_logtype, "%s(): " fmt, \
+ __func__, ## args)
+
+/** Opaque data type for flow classifier */
+struct rte_flow_classifier;
+
+/** Opaque data type for flow classify rule */
+struct rte_flow_classify_rule;
+
+/** Flow classify rule type */
+enum rte_flow_classify_rule_type {
+ /** no type */
+ RTE_FLOW_CLASSIFY_RULE_TYPE_NONE,
+ /** IPv4 5tuple type */
+ RTE_FLOW_CLASSIFY_RULE_TYPE_IPV4_5TUPLE,
+};
+
+/** Flow classify table type */
+enum rte_flow_classify_table_type {
+ /** no type */
+ RTE_FLOW_CLASSIFY_TABLE_TYPE_NONE,
+ /** ACL type */
+ RTE_FLOW_CLASSIFY_TABLE_TYPE_ACL,
+};
+
+/**
+ * Maximum number of tables allowed for any Flow Classifier instance.
+ * The value of this parameter cannot be changed.
+ */
+#define RTE_FLOW_CLASSIFY_TABLE_MAX 64
+
+/** Parameters for flow classifier creation */
+struct rte_flow_classifier_params {
+ /** flow classifier name */
+ const char *name;
+
+ /** CPU socket ID where memory for the flow classifier and its */
+ /** elements (tables) should be allocated */
+ int socket_id;
+
+ /** Table type */
+ enum rte_flow_classify_table_type type;
+};
+
+/** Parameters for table creation */
+struct rte_flow_classify_table_params {
+ /** Table operations (specific to each table type) */
+ struct rte_table_ops *ops;
+
+ /** Opaque param to be passed to the table create operation */
+ void *arg_create;
+};
+
+/** IPv4 5-tuple data */
+struct rte_flow_classify_ipv4_5tuple {
+ uint32_t dst_ip; /**< Destination IP address in big endian. */
+ uint32_t dst_ip_mask; /**< Mask of destination IP address. */
+ uint32_t src_ip; /**< Source IP address in big endian. */
+ uint32_t src_ip_mask; /**< Mask of destination IP address. */
+ uint16_t dst_port; /**< Destination port in big endian. */
+ uint16_t dst_port_mask; /**< Mask of destination port. */
+ uint16_t src_port; /**< Source Port in big endian. */
+ uint16_t src_port_mask; /**< Mask of source port. */
+ uint8_t proto; /**< L4 protocol. */
+ uint8_t proto_mask; /**< Mask of L4 protocol. */
+};
+
+/**
+ * Flow stats
+ *
+ * For the count action, stats can be returned by the query API.
+ *
+ * Storage for stats is provided by application.
+ */
+struct rte_flow_classify_stats {
+ void *stats;
+};
+
+struct rte_flow_classify_ipv4_5tuple_stats {
+ /** count of packets that match IPv4 5tuple pattern */
+ uint64_t counter1;
+ /** IPv4 5tuple data */
+ struct rte_flow_classify_ipv4_5tuple ipv4_5tuple;
+};
+
+/**
+ * Flow classifier create
+ *
+ * @param params
+ * Parameters for flow classifier creation
+ * @return
+ * Handle to flow classifier instance on success or NULL otherwise
+ */
+struct rte_flow_classifier *
+rte_flow_classifier_create(struct rte_flow_classifier_params *params);
+
+/**
+ * Flow classifier free
+ *
+ * @param cls
+ * Handle to flow classifier instance
+ * @return
+ * 0 on success, error code otherwise
+ */
+int
+rte_flow_classifier_free(struct rte_flow_classifier *cls);
+
+/**
+ * Flow classify table create
+ *
+ * @param cls
+ * Handle to flow classifier instance
+ * @param params
+ * Parameters for flow_classify table creation
+ * @param table_id
+ * Table ID. Valid only within the scope of table IDs of the current
+ * classifier. Only returned after a successful invocation.
+ * @return
+ * 0 on success, error code otherwise
+ */
+int
+rte_flow_classify_table_create(struct rte_flow_classifier *cls,
+ struct rte_flow_classify_table_params *params,
+ uint32_t *table_id);
+
+/**
+ * Add a flow classify rule to the flow_classifer table.
+ *
+ * @param[in] cls
+ * Flow classifier handle
+ * @param[in] table_id
+ * id of table
+ * @param[out] key_found
+ * returns 1 if key present already, 0 otherwise.
+ * @param[in] attr
+ * Flow rule attributes
+ * @param[in] pattern
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END pattern item).
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. Structure
+ * initialised in case of error only.
+ * @return
+ * A valid handle in case of success, NULL otherwise.
+ */
+struct rte_flow_classify_rule *
+rte_flow_classify_table_entry_add(struct rte_flow_classifier *cls,
+ uint32_t table_id,
+ int *key_found,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item pattern[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error);
+
+/**
+ * Delete a flow classify rule from the flow_classifer table.
+ *
+ * @param[in] cls
+ * Flow classifier handle
+ * @param[in] table_id
+ * id of table
+ * @param[in] rule
+ * Flow classify rule
+ * @return
+ * 0 on success, error code otherwise.
+ */
+int
+rte_flow_classify_table_entry_delete(struct rte_flow_classifier *cls,
+ uint32_t table_id,
+ struct rte_flow_classify_rule *rule);
+
+/**
+ * Query flow classifier for given rule.
+ *
+ * @param[in] cls
+ * Flow classifier handle
+ * @param[in] table_id
+ * id of table
+ * @param[in] pkts
+ * Pointer to packets to process
+ * @param[in] nb_pkts
+ * Number of packets to process
+ * @param[in] rule
+ * Flow classify rule
+ * @param[in] stats
+ * Flow classify stats
+ *
+ * @return
+ * 0 on success, error code otherwise.
+ */
+int
+rte_flow_classifier_query(struct rte_flow_classifier *cls,
+ uint32_t table_id,
+ struct rte_mbuf **pkts,
+ const uint16_t nb_pkts,
+ struct rte_flow_classify_rule *rule,
+ struct rte_flow_classify_stats *stats);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_FLOW_CLASSIFY_H_ */
diff --git a/lib/librte_flow_classify/rte_flow_classify_parse.c b/lib/librte_flow_classify/rte_flow_classify_parse.c
new file mode 100644
index 00000000..dbfa1115
--- /dev/null
+++ b/lib/librte_flow_classify/rte_flow_classify_parse.c
@@ -0,0 +1,546 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_flow_classify.h>
+#include "rte_flow_classify_parse.h"
+#include <rte_flow_driver.h>
+
+struct classify_valid_pattern {
+ enum rte_flow_item_type *items;
+ parse_filter_t parse_filter;
+};
+
+static struct rte_flow_action action;
+
+/* Pattern for IPv4 5-tuple UDP filter */
+static enum rte_flow_item_type pattern_ntuple_1[] = {
+ RTE_FLOW_ITEM_TYPE_ETH,
+ RTE_FLOW_ITEM_TYPE_IPV4,
+ RTE_FLOW_ITEM_TYPE_UDP,
+ RTE_FLOW_ITEM_TYPE_END,
+};
+
+/* Pattern for IPv4 5-tuple TCP filter */
+static enum rte_flow_item_type pattern_ntuple_2[] = {
+ RTE_FLOW_ITEM_TYPE_ETH,
+ RTE_FLOW_ITEM_TYPE_IPV4,
+ RTE_FLOW_ITEM_TYPE_TCP,
+ RTE_FLOW_ITEM_TYPE_END,
+};
+
+/* Pattern for IPv4 5-tuple SCTP filter */
+static enum rte_flow_item_type pattern_ntuple_3[] = {
+ RTE_FLOW_ITEM_TYPE_ETH,
+ RTE_FLOW_ITEM_TYPE_IPV4,
+ RTE_FLOW_ITEM_TYPE_SCTP,
+ RTE_FLOW_ITEM_TYPE_END,
+};
+
+static int
+classify_parse_ntuple_filter(const struct rte_flow_attr *attr,
+ const struct rte_flow_item pattern[],
+ const struct rte_flow_action actions[],
+ struct rte_eth_ntuple_filter *filter,
+ struct rte_flow_error *error);
+
+static struct classify_valid_pattern classify_supported_patterns[] = {
+ /* ntuple */
+ { pattern_ntuple_1, classify_parse_ntuple_filter },
+ { pattern_ntuple_2, classify_parse_ntuple_filter },
+ { pattern_ntuple_3, classify_parse_ntuple_filter },
+};
+
+struct rte_flow_action *
+classify_get_flow_action(void)
+{
+ return &action;
+}
+
+/* Find the first VOID or non-VOID item pointer */
+const struct rte_flow_item *
+classify_find_first_item(const struct rte_flow_item *item, bool is_void)
+{
+ bool is_find;
+
+ while (item->type != RTE_FLOW_ITEM_TYPE_END) {
+ if (is_void)
+ is_find = item->type == RTE_FLOW_ITEM_TYPE_VOID;
+ else
+ is_find = item->type != RTE_FLOW_ITEM_TYPE_VOID;
+ if (is_find)
+ break;
+ item++;
+ }
+ return item;
+}
+
+/* Skip all VOID items of the pattern */
+void
+classify_pattern_skip_void_item(struct rte_flow_item *items,
+ const struct rte_flow_item *pattern)
+{
+ uint32_t cpy_count = 0;
+ const struct rte_flow_item *pb = pattern, *pe = pattern;
+
+ for (;;) {
+ /* Find a non-void item first */
+ pb = classify_find_first_item(pb, false);
+ if (pb->type == RTE_FLOW_ITEM_TYPE_END) {
+ pe = pb;
+ break;
+ }
+
+ /* Find a void item */
+ pe = classify_find_first_item(pb + 1, true);
+
+ cpy_count = pe - pb;
+ rte_memcpy(items, pb, sizeof(struct rte_flow_item) * cpy_count);
+
+ items += cpy_count;
+
+ if (pe->type == RTE_FLOW_ITEM_TYPE_END) {
+ pb = pe;
+ break;
+ }
+
+ pb = pe + 1;
+ }
+ /* Copy the END item. */
+ rte_memcpy(items, pe, sizeof(struct rte_flow_item));
+}
+
+/* Check if the pattern matches a supported item type array */
+static bool
+classify_match_pattern(enum rte_flow_item_type *item_array,
+ struct rte_flow_item *pattern)
+{
+ struct rte_flow_item *item = pattern;
+
+ while ((*item_array == item->type) &&
+ (*item_array != RTE_FLOW_ITEM_TYPE_END)) {
+ item_array++;
+ item++;
+ }
+
+ return (*item_array == RTE_FLOW_ITEM_TYPE_END &&
+ item->type == RTE_FLOW_ITEM_TYPE_END);
+}
+
+/* Find if there's parse filter function matched */
+parse_filter_t
+classify_find_parse_filter_func(struct rte_flow_item *pattern)
+{
+ parse_filter_t parse_filter = NULL;
+ uint8_t i = 0;
+
+ for (; i < RTE_DIM(classify_supported_patterns); i++) {
+ if (classify_match_pattern(classify_supported_patterns[i].items,
+ pattern)) {
+ parse_filter =
+ classify_supported_patterns[i].parse_filter;
+ break;
+ }
+ }
+
+ return parse_filter;
+}
+
+#define FLOW_RULE_MIN_PRIORITY 8
+#define FLOW_RULE_MAX_PRIORITY 0
+
+#define NEXT_ITEM_OF_PATTERN(item, pattern, index)\
+ do {\
+ item = pattern + index;\
+ while (item->type == RTE_FLOW_ITEM_TYPE_VOID) {\
+ index++;\
+ item = pattern + index;\
+ } \
+ } while (0)
+
+#define NEXT_ITEM_OF_ACTION(act, actions, index)\
+ do {\
+ act = actions + index;\
+ while (act->type == RTE_FLOW_ACTION_TYPE_VOID) {\
+ index++;\
+ act = actions + index;\
+ } \
+ } while (0)
+
+/**
+ * Please aware there's an assumption for all the parsers.
+ * rte_flow_item is using big endian, rte_flow_attr and
+ * rte_flow_action are using CPU order.
+ * Because the pattern is used to describe the packets,
+ * normally the packets should use network order.
+ */
+
+/**
+ * Parse the rule to see if it is a n-tuple rule.
+ * And get the n-tuple filter info BTW.
+ * pattern:
+ * The first not void item can be ETH or IPV4.
+ * The second not void item must be IPV4 if the first one is ETH.
+ * The third not void item must be UDP or TCP.
+ * The next not void item must be END.
+ * action:
+ * The first not void action should be QUEUE.
+ * The next not void action should be END.
+ * pattern example:
+ * ITEM Spec Mask
+ * ETH NULL NULL
+ * IPV4 src_addr 192.168.1.20 0xFFFFFFFF
+ * dst_addr 192.167.3.50 0xFFFFFFFF
+ * next_proto_id 17 0xFF
+ * UDP/TCP/ src_port 80 0xFFFF
+ * SCTP dst_port 80 0xFFFF
+ * END
+ * other members in mask and spec should set to 0x00.
+ * item->last should be NULL.
+ */
+static int
+classify_parse_ntuple_filter(const struct rte_flow_attr *attr,
+ const struct rte_flow_item pattern[],
+ const struct rte_flow_action actions[],
+ struct rte_eth_ntuple_filter *filter,
+ struct rte_flow_error *error)
+{
+ const struct rte_flow_item *item;
+ const struct rte_flow_action *act;
+ const struct rte_flow_item_ipv4 *ipv4_spec;
+ const struct rte_flow_item_ipv4 *ipv4_mask;
+ const struct rte_flow_item_tcp *tcp_spec;
+ const struct rte_flow_item_tcp *tcp_mask;
+ const struct rte_flow_item_udp *udp_spec;
+ const struct rte_flow_item_udp *udp_mask;
+ const struct rte_flow_item_sctp *sctp_spec;
+ const struct rte_flow_item_sctp *sctp_mask;
+ uint32_t index;
+
+ if (!pattern) {
+ rte_flow_error_set(error,
+ EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_NUM,
+ NULL, "NULL pattern.");
+ return -EINVAL;
+ }
+
+ if (!actions) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION_NUM,
+ NULL, "NULL action.");
+ return -EINVAL;
+ }
+ if (!attr) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ATTR,
+ NULL, "NULL attribute.");
+ return -EINVAL;
+ }
+
+ /* parse pattern */
+ index = 0;
+
+ /* the first not void item can be MAC or IPv4 */
+ NEXT_ITEM_OF_PATTERN(item, pattern, index);
+
+ if (item->type != RTE_FLOW_ITEM_TYPE_ETH &&
+ item->type != RTE_FLOW_ITEM_TYPE_IPV4) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ item, "Not supported by ntuple filter");
+ return -EINVAL;
+ }
+ /* Skip Ethernet */
+ if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
+ /*Not supported last point for range*/
+ if (item->last) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ item,
+ "Not supported last point for range");
+ return -EINVAL;
+
+ }
+ /* if the first item is MAC, the content should be NULL */
+ if (item->spec || item->mask) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ item,
+ "Not supported by ntuple filter");
+ return -EINVAL;
+ }
+ /* check if the next not void item is IPv4 */
+ index++;
+ NEXT_ITEM_OF_PATTERN(item, pattern, index);
+ if (item->type != RTE_FLOW_ITEM_TYPE_IPV4) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ item,
+ "Not supported by ntuple filter");
+ return -EINVAL;
+ }
+ }
+
+ /* get the IPv4 info */
+ if (!item->spec || !item->mask) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ item, "Invalid ntuple mask");
+ return -EINVAL;
+ }
+ /*Not supported last point for range*/
+ if (item->last) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ item, "Not supported last point for range");
+ return -EINVAL;
+
+ }
+
+ ipv4_mask = (const struct rte_flow_item_ipv4 *)item->mask;
+ /**
+ * Only support src & dst addresses, protocol,
+ * others should be masked.
+ */
+ if (ipv4_mask->hdr.version_ihl ||
+ ipv4_mask->hdr.type_of_service ||
+ ipv4_mask->hdr.total_length ||
+ ipv4_mask->hdr.packet_id ||
+ ipv4_mask->hdr.fragment_offset ||
+ ipv4_mask->hdr.time_to_live ||
+ ipv4_mask->hdr.hdr_checksum) {
+ rte_flow_error_set(error,
+ EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
+ item, "Not supported by ntuple filter");
+ return -EINVAL;
+ }
+
+ filter->dst_ip_mask = ipv4_mask->hdr.dst_addr;
+ filter->src_ip_mask = ipv4_mask->hdr.src_addr;
+ filter->proto_mask = ipv4_mask->hdr.next_proto_id;
+
+ ipv4_spec = (const struct rte_flow_item_ipv4 *)item->spec;
+ filter->dst_ip = ipv4_spec->hdr.dst_addr;
+ filter->src_ip = ipv4_spec->hdr.src_addr;
+ filter->proto = ipv4_spec->hdr.next_proto_id;
+
+ /* check if the next not void item is TCP or UDP or SCTP */
+ index++;
+ NEXT_ITEM_OF_PATTERN(item, pattern, index);
+ if (item->type != RTE_FLOW_ITEM_TYPE_TCP &&
+ item->type != RTE_FLOW_ITEM_TYPE_UDP &&
+ item->type != RTE_FLOW_ITEM_TYPE_SCTP) {
+ memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ item, "Not supported by ntuple filter");
+ return -EINVAL;
+ }
+
+ /* get the TCP/UDP info */
+ if (!item->spec || !item->mask) {
+ memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ item, "Invalid ntuple mask");
+ return -EINVAL;
+ }
+
+ /*Not supported last point for range*/
+ if (item->last) {
+ memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ item, "Not supported last point for range");
+ return -EINVAL;
+
+ }
+
+ if (item->type == RTE_FLOW_ITEM_TYPE_TCP) {
+ tcp_mask = (const struct rte_flow_item_tcp *)item->mask;
+
+ /**
+ * Only support src & dst ports, tcp flags,
+ * others should be masked.
+ */
+ if (tcp_mask->hdr.sent_seq ||
+ tcp_mask->hdr.recv_ack ||
+ tcp_mask->hdr.data_off ||
+ tcp_mask->hdr.rx_win ||
+ tcp_mask->hdr.cksum ||
+ tcp_mask->hdr.tcp_urp) {
+ memset(filter, 0,
+ sizeof(struct rte_eth_ntuple_filter));
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ item, "Not supported by ntuple filter");
+ return -EINVAL;
+ }
+
+ filter->dst_port_mask = tcp_mask->hdr.dst_port;
+ filter->src_port_mask = tcp_mask->hdr.src_port;
+ if (tcp_mask->hdr.tcp_flags == 0xFF) {
+ filter->flags |= RTE_NTUPLE_FLAGS_TCP_FLAG;
+ } else if (!tcp_mask->hdr.tcp_flags) {
+ filter->flags &= ~RTE_NTUPLE_FLAGS_TCP_FLAG;
+ } else {
+ memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ item, "Not supported by ntuple filter");
+ return -EINVAL;
+ }
+
+ tcp_spec = (const struct rte_flow_item_tcp *)item->spec;
+ filter->dst_port = tcp_spec->hdr.dst_port;
+ filter->src_port = tcp_spec->hdr.src_port;
+ filter->tcp_flags = tcp_spec->hdr.tcp_flags;
+ } else if (item->type == RTE_FLOW_ITEM_TYPE_UDP) {
+ udp_mask = (const struct rte_flow_item_udp *)item->mask;
+
+ /**
+ * Only support src & dst ports,
+ * others should be masked.
+ */
+ if (udp_mask->hdr.dgram_len ||
+ udp_mask->hdr.dgram_cksum) {
+ memset(filter, 0,
+ sizeof(struct rte_eth_ntuple_filter));
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ item, "Not supported by ntuple filter");
+ return -EINVAL;
+ }
+
+ filter->dst_port_mask = udp_mask->hdr.dst_port;
+ filter->src_port_mask = udp_mask->hdr.src_port;
+
+ udp_spec = (const struct rte_flow_item_udp *)item->spec;
+ filter->dst_port = udp_spec->hdr.dst_port;
+ filter->src_port = udp_spec->hdr.src_port;
+ } else {
+ sctp_mask = (const struct rte_flow_item_sctp *)item->mask;
+
+ /**
+ * Only support src & dst ports,
+ * others should be masked.
+ */
+ if (sctp_mask->hdr.tag ||
+ sctp_mask->hdr.cksum) {
+ memset(filter, 0,
+ sizeof(struct rte_eth_ntuple_filter));
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ item, "Not supported by ntuple filter");
+ return -EINVAL;
+ }
+
+ filter->dst_port_mask = sctp_mask->hdr.dst_port;
+ filter->src_port_mask = sctp_mask->hdr.src_port;
+
+ sctp_spec = (const struct rte_flow_item_sctp *)item->spec;
+ filter->dst_port = sctp_spec->hdr.dst_port;
+ filter->src_port = sctp_spec->hdr.src_port;
+ }
+
+ /* check if the next not void item is END */
+ index++;
+ NEXT_ITEM_OF_PATTERN(item, pattern, index);
+ if (item->type != RTE_FLOW_ITEM_TYPE_END) {
+ memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ item, "Not supported by ntuple filter");
+ return -EINVAL;
+ }
+
+ /* parse action */
+ index = 0;
+
+ /**
+ * n-tuple only supports count,
+ * check if the first not void action is COUNT.
+ */
+ memset(&action, 0, sizeof(action));
+ NEXT_ITEM_OF_ACTION(act, actions, index);
+ if (act->type != RTE_FLOW_ACTION_TYPE_COUNT) {
+ memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ item, "Not supported action.");
+ return -EINVAL;
+ }
+ action.type = RTE_FLOW_ACTION_TYPE_COUNT;
+
+ /* check if the next not void item is END */
+ index++;
+ NEXT_ITEM_OF_ACTION(act, actions, index);
+ if (act->type != RTE_FLOW_ACTION_TYPE_END) {
+ memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ act, "Not supported action.");
+ return -EINVAL;
+ }
+
+ /* parse attr */
+ /* must be input direction */
+ if (!attr->ingress) {
+ memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+ attr, "Only support ingress.");
+ return -EINVAL;
+ }
+
+ /* not supported */
+ if (attr->egress) {
+ memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+ attr, "Not support egress.");
+ return -EINVAL;
+ }
+
+ if (attr->priority > 0xFFFF) {
+ memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+ attr, "Error priority.");
+ return -EINVAL;
+ }
+ filter->priority = (uint16_t)attr->priority;
+ if (attr->priority > FLOW_RULE_MIN_PRIORITY)
+ filter->priority = FLOW_RULE_MAX_PRIORITY;
+
+ return 0;
+}
diff --git a/lib/librte_flow_classify/rte_flow_classify_parse.h b/lib/librte_flow_classify/rte_flow_classify_parse.h
new file mode 100644
index 00000000..1d4708a7
--- /dev/null
+++ b/lib/librte_flow_classify/rte_flow_classify_parse.h
@@ -0,0 +1,74 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_FLOW_CLASSIFY_PARSE_H_
+#define _RTE_FLOW_CLASSIFY_PARSE_H_
+
+#include <rte_ethdev.h>
+#include <rte_ether.h>
+#include <rte_flow.h>
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int (*parse_filter_t)(const struct rte_flow_attr *attr,
+ const struct rte_flow_item pattern[],
+ const struct rte_flow_action actions[],
+ struct rte_eth_ntuple_filter *filter,
+ struct rte_flow_error *error);
+
+/* Skip all VOID items of the pattern */
+void
+classify_pattern_skip_void_item(struct rte_flow_item *items,
+ const struct rte_flow_item *pattern);
+
+/* Find the first VOID or non-VOID item pointer */
+const struct rte_flow_item *
+classify_find_first_item(const struct rte_flow_item *item, bool is_void);
+
+
+/* Find if there's parse filter function matched */
+parse_filter_t
+classify_find_parse_filter_func(struct rte_flow_item *pattern);
+
+/* get action data */
+struct rte_flow_action *
+classify_get_flow_action(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_FLOW_CLASSIFY_PARSE_H_ */
diff --git a/lib/librte_flow_classify/rte_flow_classify_version.map b/lib/librte_flow_classify/rte_flow_classify_version.map
new file mode 100644
index 00000000..f7695cbf
--- /dev/null
+++ b/lib/librte_flow_classify/rte_flow_classify_version.map
@@ -0,0 +1,12 @@
+EXPERIMENTAL {
+ global:
+
+ rte_flow_classifier_create;
+ rte_flow_classifier_free;
+ rte_flow_classifier_query;
+ rte_flow_classify_table_create;
+ rte_flow_classify_table_entry_add;
+ rte_flow_classify_table_entry_delete;
+
+ local: *;
+};
diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile
index 747eeec9..eb423ccb 100644
--- a/lib/librte_gro/Makefile
+++ b/lib/librte_gro/Makefile
@@ -36,6 +36,7 @@ LIB = librte_gro.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_ethdev -lrte_net
EXPORT_MAP := rte_gro_version.map
diff --git a/lib/librte_gro/rte_gro_version.map b/lib/librte_gro/rte_gro_version.map
index bb40bb41..1606b6dc 100644
--- a/lib/librte_gro/rte_gro_version.map
+++ b/lib/librte_gro/rte_gro_version.map
@@ -1,8 +1,8 @@
DPDK_17.08 {
global:
- rte_gro_ctrl_create;
- rte_gro_ctrl_destroy;
+ rte_gro_ctx_create;
+ rte_gro_ctx_destroy;
rte_gro_get_pkt_count;
rte_gro_reassemble;
rte_gro_reassemble_burst;
diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile
new file mode 100644
index 00000000..ea5ad742
--- /dev/null
+++ b/lib/librte_gso/Makefile
@@ -0,0 +1,54 @@
+# BSD LICENSE
+#
+# Copyright(c) 2017 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_gso.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_ethdev -lrte_net
+LDLIBS += -lrte_mempool
+
+EXPORT_MAP := rte_gso_version.map
+
+LIBABIVER := 1
+
+#source files
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp4.c
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_tcp4.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_GSO)-include += rte_gso.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_gso/gso_common.c b/lib/librte_gso/gso_common.c
new file mode 100644
index 00000000..ee75d4cd
--- /dev/null
+++ b/lib/librte_gso/gso_common.c
@@ -0,0 +1,153 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdbool.h>
+#include <errno.h>
+
+#include <rte_memcpy.h>
+#include <rte_mempool.h>
+
+#include "gso_common.h"
+
+static inline void
+hdr_segment_init(struct rte_mbuf *hdr_segment, struct rte_mbuf *pkt,
+ uint16_t pkt_hdr_offset)
+{
+ /* Copy MBUF metadata */
+ hdr_segment->nb_segs = 1;
+ hdr_segment->port = pkt->port;
+ hdr_segment->ol_flags = pkt->ol_flags;
+ hdr_segment->packet_type = pkt->packet_type;
+ hdr_segment->pkt_len = pkt_hdr_offset;
+ hdr_segment->data_len = pkt_hdr_offset;
+ hdr_segment->tx_offload = pkt->tx_offload;
+
+ /* Copy the packet header */
+ rte_memcpy(rte_pktmbuf_mtod(hdr_segment, char *),
+ rte_pktmbuf_mtod(pkt, char *),
+ pkt_hdr_offset);
+}
+
+static inline void
+free_gso_segment(struct rte_mbuf **pkts, uint16_t nb_pkts)
+{
+ uint16_t i;
+
+ for (i = 0; i < nb_pkts; i++)
+ rte_pktmbuf_free(pkts[i]);
+}
+
+int
+gso_do_segment(struct rte_mbuf *pkt,
+ uint16_t pkt_hdr_offset,
+ uint16_t pyld_unit_size,
+ struct rte_mempool *direct_pool,
+ struct rte_mempool *indirect_pool,
+ struct rte_mbuf **pkts_out,
+ uint16_t nb_pkts_out)
+{
+ struct rte_mbuf *pkt_in;
+ struct rte_mbuf *hdr_segment, *pyld_segment, *prev_segment;
+ uint16_t pkt_in_data_pos, segment_bytes_remaining;
+ uint16_t pyld_len, nb_segs;
+ bool more_in_pkt, more_out_segs;
+
+ pkt_in = pkt;
+ nb_segs = 0;
+ more_in_pkt = 1;
+ pkt_in_data_pos = pkt_hdr_offset;
+
+ while (more_in_pkt) {
+ if (unlikely(nb_segs >= nb_pkts_out)) {
+ free_gso_segment(pkts_out, nb_segs);
+ return -EINVAL;
+ }
+
+ /* Allocate a direct MBUF */
+ hdr_segment = rte_pktmbuf_alloc(direct_pool);
+ if (unlikely(hdr_segment == NULL)) {
+ free_gso_segment(pkts_out, nb_segs);
+ return -ENOMEM;
+ }
+ /* Fill the packet header */
+ hdr_segment_init(hdr_segment, pkt, pkt_hdr_offset);
+
+ prev_segment = hdr_segment;
+ segment_bytes_remaining = pyld_unit_size;
+ more_out_segs = 1;
+
+ while (more_out_segs && more_in_pkt) {
+ /* Allocate an indirect MBUF */
+ pyld_segment = rte_pktmbuf_alloc(indirect_pool);
+ if (unlikely(pyld_segment == NULL)) {
+ rte_pktmbuf_free(hdr_segment);
+ free_gso_segment(pkts_out, nb_segs);
+ return -ENOMEM;
+ }
+ /* Attach to current MBUF segment of pkt */
+ rte_pktmbuf_attach(pyld_segment, pkt_in);
+
+ prev_segment->next = pyld_segment;
+ prev_segment = pyld_segment;
+
+ pyld_len = segment_bytes_remaining;
+ if (pyld_len + pkt_in_data_pos > pkt_in->data_len)
+ pyld_len = pkt_in->data_len - pkt_in_data_pos;
+
+ pyld_segment->data_off = pkt_in_data_pos +
+ pkt_in->data_off;
+ pyld_segment->data_len = pyld_len;
+
+ /* Update header segment */
+ hdr_segment->pkt_len += pyld_len;
+ hdr_segment->nb_segs++;
+
+ pkt_in_data_pos += pyld_len;
+ segment_bytes_remaining -= pyld_len;
+
+ /* Finish processing a MBUF segment of pkt */
+ if (pkt_in_data_pos == pkt_in->data_len) {
+ pkt_in = pkt_in->next;
+ pkt_in_data_pos = 0;
+ if (pkt_in == NULL)
+ more_in_pkt = 0;
+ }
+
+ /* Finish generating a GSO segment */
+ if (segment_bytes_remaining == 0)
+ more_out_segs = 0;
+ }
+ pkts_out[nb_segs++] = hdr_segment;
+ }
+ return nb_segs;
+}
diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h
new file mode 100644
index 00000000..145ea495
--- /dev/null
+++ b/lib/librte_gso/gso_common.h
@@ -0,0 +1,171 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _GSO_COMMON_H_
+#define _GSO_COMMON_H_
+
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+
+#define IS_FRAGMENTED(frag_off) (((frag_off) & IPV4_HDR_OFFSET_MASK) != 0 \
+ || ((frag_off) & IPV4_HDR_MF_FLAG) == IPV4_HDR_MF_FLAG)
+
+#define TCP_HDR_PSH_MASK ((uint8_t)0x08)
+#define TCP_HDR_FIN_MASK ((uint8_t)0x01)
+
+#define IS_IPV4_TCP(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4)) == \
+ (PKT_TX_TCP_SEG | PKT_TX_IPV4))
+
+#define IS_IPV4_VXLAN_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \
+ PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_VXLAN)) == \
+ (PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
+ PKT_TX_TUNNEL_VXLAN))
+
+#define IS_IPV4_GRE_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \
+ PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_GRE)) == \
+ (PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
+ PKT_TX_TUNNEL_GRE))
+
+/**
+ * Internal function which updates the UDP header of a packet, following
+ * segmentation. This is required to update the header's datagram length field.
+ *
+ * @param pkt
+ * The packet containing the UDP header.
+ * @param udp_offset
+ * The offset of the UDP header from the start of the packet.
+ */
+static inline void
+update_udp_header(struct rte_mbuf *pkt, uint16_t udp_offset)
+{
+ struct udp_hdr *udp_hdr;
+
+ udp_hdr = (struct udp_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+ udp_offset);
+ udp_hdr->dgram_len = rte_cpu_to_be_16(pkt->pkt_len - udp_offset);
+}
+
+/**
+ * Internal function which updates the TCP header of a packet, following
+ * segmentation. This is required to update the header's 'sent' sequence
+ * number, and also to clear 'PSH' and 'FIN' flags for non-tail segments.
+ *
+ * @param pkt
+ * The packet containing the TCP header.
+ * @param l4_offset
+ * The offset of the TCP header from the start of the packet.
+ * @param sent_seq
+ * The sent sequence number.
+ * @param non-tail
+ * Indicates whether or not this is a tail segment.
+ */
+static inline void
+update_tcp_header(struct rte_mbuf *pkt, uint16_t l4_offset, uint32_t sent_seq,
+ uint8_t non_tail)
+{
+ struct tcp_hdr *tcp_hdr;
+
+ tcp_hdr = (struct tcp_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+ l4_offset);
+ tcp_hdr->sent_seq = rte_cpu_to_be_32(sent_seq);
+ if (likely(non_tail))
+ tcp_hdr->tcp_flags &= (~(TCP_HDR_PSH_MASK |
+ TCP_HDR_FIN_MASK));
+}
+
+/**
+ * Internal function which updates the IPv4 header of a packet, following
+ * segmentation. This is required to update the header's 'total_length' field,
+ * to reflect the reduced length of the now-segmented packet. Furthermore, the
+ * header's 'packet_id' field must be updated to reflect the new ID of the
+ * now-segmented packet.
+ *
+ * @param pkt
+ * The packet containing the IPv4 header.
+ * @param l3_offset
+ * The offset of the IPv4 header from the start of the packet.
+ * @param id
+ * The new ID of the packet.
+ */
+static inline void
+update_ipv4_header(struct rte_mbuf *pkt, uint16_t l3_offset, uint16_t id)
+{
+ struct ipv4_hdr *ipv4_hdr;
+
+ ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+ l3_offset);
+ ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len - l3_offset);
+ ipv4_hdr->packet_id = rte_cpu_to_be_16(id);
+}
+
+/**
+ * Internal function which divides the input packet into small segments.
+ * Each of the newly-created segments is organized as a two-segment MBUF,
+ * where the first segment is a standard mbuf, which stores a copy of
+ * packet header, and the second is an indirect mbuf which points to a
+ * section of data in the input packet.
+ *
+ * @param pkt
+ * Packet to segment.
+ * @param pkt_hdr_offset
+ * Packet header offset, measured in bytes.
+ * @param pyld_unit_size
+ * The max payload length of a GSO segment.
+ * @param direct_pool
+ * MBUF pool used for allocating direct buffers for output segments.
+ * @param indirect_pool
+ * MBUF pool used for allocating indirect buffers for output segments.
+ * @param pkts_out
+ * Pointer array used to keep the mbuf addresses of output segments. If
+ * the memory space in pkts_out is insufficient, gso_do_segment() fails
+ * and returns -EINVAL.
+ * @param nb_pkts_out
+ * The max number of items that pkts_out can keep.
+ *
+ * @return
+ * - The number of segments created in the event of success.
+ * - Return -ENOMEM if run out of memory in MBUF pools.
+ * - Return -EINVAL for invalid parameters.
+ */
+int gso_do_segment(struct rte_mbuf *pkt,
+ uint16_t pkt_hdr_offset,
+ uint16_t pyld_unit_size,
+ struct rte_mempool *direct_pool,
+ struct rte_mempool *indirect_pool,
+ struct rte_mbuf **pkts_out,
+ uint16_t nb_pkts_out);
+#endif
diff --git a/lib/librte_gso/gso_tcp4.c b/lib/librte_gso/gso_tcp4.c
new file mode 100644
index 00000000..0c628cb1
--- /dev/null
+++ b/lib/librte_gso/gso_tcp4.c
@@ -0,0 +1,102 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "gso_common.h"
+#include "gso_tcp4.h"
+
+static void
+update_ipv4_tcp_headers(struct rte_mbuf *pkt, uint8_t ipid_delta,
+ struct rte_mbuf **segs, uint16_t nb_segs)
+{
+ struct ipv4_hdr *ipv4_hdr;
+ struct tcp_hdr *tcp_hdr;
+ uint32_t sent_seq;
+ uint16_t id, tail_idx, i;
+ uint16_t l3_offset = pkt->l2_len;
+ uint16_t l4_offset = l3_offset + pkt->l3_len;
+
+ ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char*) +
+ l3_offset);
+ tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+ id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+ sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
+ tail_idx = nb_segs - 1;
+
+ for (i = 0; i < nb_segs; i++) {
+ update_ipv4_header(segs[i], l3_offset, id);
+ update_tcp_header(segs[i], l4_offset, sent_seq, i < tail_idx);
+ id += ipid_delta;
+ sent_seq += (segs[i]->pkt_len - segs[i]->data_len);
+ }
+}
+
+int
+gso_tcp4_segment(struct rte_mbuf *pkt,
+ uint16_t gso_size,
+ uint8_t ipid_delta,
+ struct rte_mempool *direct_pool,
+ struct rte_mempool *indirect_pool,
+ struct rte_mbuf **pkts_out,
+ uint16_t nb_pkts_out)
+{
+ struct ipv4_hdr *ipv4_hdr;
+ uint16_t pyld_unit_size, hdr_offset;
+ uint16_t frag_off;
+ int ret;
+
+ /* Don't process the fragmented packet */
+ ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+ pkt->l2_len);
+ frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+ if (unlikely(IS_FRAGMENTED(frag_off))) {
+ pkts_out[0] = pkt;
+ return 1;
+ }
+
+ /* Don't process the packet without data */
+ hdr_offset = pkt->l2_len + pkt->l3_len + pkt->l4_len;
+ if (unlikely(hdr_offset >= pkt->pkt_len)) {
+ pkts_out[0] = pkt;
+ return 1;
+ }
+
+ pyld_unit_size = gso_size - hdr_offset;
+
+ /* Segment the payload */
+ ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool,
+ indirect_pool, pkts_out, nb_pkts_out);
+ if (ret > 1)
+ update_ipv4_tcp_headers(pkt, ipid_delta, pkts_out, ret);
+
+ return ret;
+}
diff --git a/lib/librte_gso/gso_tcp4.h b/lib/librte_gso/gso_tcp4.h
new file mode 100644
index 00000000..1c574412
--- /dev/null
+++ b/lib/librte_gso/gso_tcp4.h
@@ -0,0 +1,74 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _GSO_TCP4_H_
+#define _GSO_TCP4_H_
+
+#include <stdint.h>
+#include <rte_mbuf.h>
+
+/**
+ * Segment an IPv4/TCP packet. This function doesn't check if the input
+ * packet has correct checksums, and doesn't update checksums for output
+ * GSO segments. Furthermore, it doesn't process IP fragment packets.
+ *
+ * @param pkt
+ * The packet mbuf to segment.
+ * @param gso_size
+ * The max length of a GSO segment, measured in bytes.
+ * @param ipid_delta
+ * The increasing unit of IP ids.
+ * @param direct_pool
+ * MBUF pool used for allocating direct buffers for output segments.
+ * @param indirect_pool
+ * MBUF pool used for allocating indirect buffers for output segments.
+ * @param pkts_out
+ * Pointer array used to store the MBUF addresses of output GSO
+ * segments, when the function succeeds. If the memory space in
+ * pkts_out is insufficient, it fails and returns -EINVAL.
+ * @param nb_pkts_out
+ * The max number of items that 'pkts_out' can keep.
+ *
+ * @return
+ * - The number of GSO segments filled in pkts_out on success.
+ * - Return -ENOMEM if run out of memory in MBUF pools.
+ * - Return -EINVAL for invalid parameters.
+ */
+int gso_tcp4_segment(struct rte_mbuf *pkt,
+ uint16_t gso_size,
+ uint8_t ip_delta,
+ struct rte_mempool *direct_pool,
+ struct rte_mempool *indirect_pool,
+ struct rte_mbuf **pkts_out,
+ uint16_t nb_pkts_out);
+#endif
diff --git a/lib/librte_gso/gso_tunnel_tcp4.c b/lib/librte_gso/gso_tunnel_tcp4.c
new file mode 100644
index 00000000..8d0cfd7a
--- /dev/null
+++ b/lib/librte_gso/gso_tunnel_tcp4.c
@@ -0,0 +1,126 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "gso_common.h"
+#include "gso_tunnel_tcp4.h"
+
+static void
+update_tunnel_ipv4_tcp_headers(struct rte_mbuf *pkt, uint8_t ipid_delta,
+ struct rte_mbuf **segs, uint16_t nb_segs)
+{
+ struct ipv4_hdr *ipv4_hdr;
+ struct tcp_hdr *tcp_hdr;
+ uint32_t sent_seq;
+ uint16_t outer_id, inner_id, tail_idx, i;
+ uint16_t outer_ipv4_offset, inner_ipv4_offset;
+ uint16_t udp_gre_offset, tcp_offset;
+ uint8_t update_udp_hdr;
+
+ outer_ipv4_offset = pkt->outer_l2_len;
+ udp_gre_offset = outer_ipv4_offset + pkt->outer_l3_len;
+ inner_ipv4_offset = udp_gre_offset + pkt->l2_len;
+ tcp_offset = inner_ipv4_offset + pkt->l3_len;
+
+ /* Outer IPv4 header. */
+ ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+ outer_ipv4_offset);
+ outer_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+
+ /* Inner IPv4 header. */
+ ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+ inner_ipv4_offset);
+ inner_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+
+ tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+ sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
+ tail_idx = nb_segs - 1;
+
+ /* Only update UDP header for VxLAN packets. */
+ update_udp_hdr = (pkt->ol_flags & PKT_TX_TUNNEL_VXLAN) ? 1 : 0;
+
+ for (i = 0; i < nb_segs; i++) {
+ update_ipv4_header(segs[i], outer_ipv4_offset, outer_id);
+ if (update_udp_hdr)
+ update_udp_header(segs[i], udp_gre_offset);
+ update_ipv4_header(segs[i], inner_ipv4_offset, inner_id);
+ update_tcp_header(segs[i], tcp_offset, sent_seq, i < tail_idx);
+ outer_id++;
+ inner_id += ipid_delta;
+ sent_seq += (segs[i]->pkt_len - segs[i]->data_len);
+ }
+}
+
+int
+gso_tunnel_tcp4_segment(struct rte_mbuf *pkt,
+ uint16_t gso_size,
+ uint8_t ipid_delta,
+ struct rte_mempool *direct_pool,
+ struct rte_mempool *indirect_pool,
+ struct rte_mbuf **pkts_out,
+ uint16_t nb_pkts_out)
+{
+ struct ipv4_hdr *inner_ipv4_hdr;
+ uint16_t pyld_unit_size, hdr_offset, frag_off;
+ int ret = 1;
+
+ hdr_offset = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len;
+ inner_ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+ hdr_offset);
+ /*
+ * Don't process the packet whose MF bit or offset in the inner
+ * IPv4 header are non-zero.
+ */
+ frag_off = rte_be_to_cpu_16(inner_ipv4_hdr->fragment_offset);
+ if (unlikely(IS_FRAGMENTED(frag_off))) {
+ pkts_out[0] = pkt;
+ return 1;
+ }
+
+ hdr_offset += pkt->l3_len + pkt->l4_len;
+ /* Don't process the packet without data */
+ if (hdr_offset >= pkt->pkt_len) {
+ pkts_out[0] = pkt;
+ return 1;
+ }
+ pyld_unit_size = gso_size - hdr_offset;
+
+ /* Segment the payload */
+ ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool,
+ indirect_pool, pkts_out, nb_pkts_out);
+ if (ret <= 1)
+ return ret;
+
+ update_tunnel_ipv4_tcp_headers(pkt, ipid_delta, pkts_out, ret);
+
+ return ret;
+}
diff --git a/lib/librte_gso/gso_tunnel_tcp4.h b/lib/librte_gso/gso_tunnel_tcp4.h
new file mode 100644
index 00000000..3c67f0cd
--- /dev/null
+++ b/lib/librte_gso/gso_tunnel_tcp4.h
@@ -0,0 +1,75 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _GSO_TUNNEL_TCP4_H_
+#define _GSO_TUNNEL_TCP4_H_
+
+#include <stdint.h>
+#include <rte_mbuf.h>
+
+/**
+ * Segment a tunneling packet with inner TCP/IPv4 headers. This function
+ * doesn't check if the input packet has correct checksums, and doesn't
+ * update checksums for output GSO segments. Furthermore, it doesn't
+ * process IP fragment packets.
+ *
+ * @param pkt
+ * The packet mbuf to segment.
+ * @param gso_size
+ * The max length of a GSO segment, measured in bytes.
+ * @param ipid_delta
+ * The increasing unit of IP ids.
+ * @param direct_pool
+ * MBUF pool used for allocating direct buffers for output segments.
+ * @param indirect_pool
+ * MBUF pool used for allocating indirect buffers for output segments.
+ * @param pkts_out
+ * Pointer array used to store the MBUF addresses of output GSO
+ * segments, when it succeeds. If the memory space in pkts_out is
+ * insufficient, it fails and returns -EINVAL.
+ * @param nb_pkts_out
+ * The max number of items that 'pkts_out' can keep.
+ *
+ * @return
+ * - The number of GSO segments filled in pkts_out on success.
+ * - Return -ENOMEM if run out of memory in MBUF pools.
+ * - Return -EINVAL for invalid parameters.
+ */
+int gso_tunnel_tcp4_segment(struct rte_mbuf *pkt,
+ uint16_t gso_size,
+ uint8_t ipid_delta,
+ struct rte_mempool *direct_pool,
+ struct rte_mempool *indirect_pool,
+ struct rte_mbuf **pkts_out,
+ uint16_t nb_pkts_out);
+#endif
diff --git a/lib/librte_gso/rte_gso.c b/lib/librte_gso/rte_gso.c
new file mode 100644
index 00000000..f86e6541
--- /dev/null
+++ b/lib/librte_gso/rte_gso.c
@@ -0,0 +1,110 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+
+#include <rte_log.h>
+#include <rte_ethdev.h>
+
+#include "rte_gso.h"
+#include "gso_common.h"
+#include "gso_tcp4.h"
+#include "gso_tunnel_tcp4.h"
+
+int
+rte_gso_segment(struct rte_mbuf *pkt,
+ const struct rte_gso_ctx *gso_ctx,
+ struct rte_mbuf **pkts_out,
+ uint16_t nb_pkts_out)
+{
+ struct rte_mempool *direct_pool, *indirect_pool;
+ struct rte_mbuf *pkt_seg;
+ uint64_t ol_flags;
+ uint16_t gso_size;
+ uint8_t ipid_delta;
+ int ret = 1;
+
+ if (pkt == NULL || pkts_out == NULL || gso_ctx == NULL ||
+ nb_pkts_out < 1 ||
+ gso_ctx->gso_size < RTE_GSO_SEG_SIZE_MIN ||
+ ((gso_ctx->gso_types & (DEV_TX_OFFLOAD_TCP_TSO |
+ DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
+ DEV_TX_OFFLOAD_GRE_TNL_TSO)) == 0))
+ return -EINVAL;
+
+ if (gso_ctx->gso_size >= pkt->pkt_len) {
+ pkt->ol_flags &= (~PKT_TX_TCP_SEG);
+ pkts_out[0] = pkt;
+ return 1;
+ }
+
+ direct_pool = gso_ctx->direct_pool;
+ indirect_pool = gso_ctx->indirect_pool;
+ gso_size = gso_ctx->gso_size;
+ ipid_delta = (gso_ctx->flag != RTE_GSO_FLAG_IPID_FIXED);
+ ol_flags = pkt->ol_flags;
+
+ if ((IS_IPV4_VXLAN_TCP4(pkt->ol_flags) &&
+ (gso_ctx->gso_types & DEV_TX_OFFLOAD_VXLAN_TNL_TSO)) ||
+ ((IS_IPV4_GRE_TCP4(pkt->ol_flags) &&
+ (gso_ctx->gso_types & DEV_TX_OFFLOAD_GRE_TNL_TSO)))) {
+ pkt->ol_flags &= (~PKT_TX_TCP_SEG);
+ ret = gso_tunnel_tcp4_segment(pkt, gso_size, ipid_delta,
+ direct_pool, indirect_pool,
+ pkts_out, nb_pkts_out);
+ } else if (IS_IPV4_TCP(pkt->ol_flags) &&
+ (gso_ctx->gso_types & DEV_TX_OFFLOAD_TCP_TSO)) {
+ pkt->ol_flags &= (~PKT_TX_TCP_SEG);
+ ret = gso_tcp4_segment(pkt, gso_size, ipid_delta,
+ direct_pool, indirect_pool,
+ pkts_out, nb_pkts_out);
+ } else {
+ /* unsupported packet, skip */
+ pkts_out[0] = pkt;
+ RTE_LOG(DEBUG, GSO, "Unsupported packet type\n");
+ return 1;
+ }
+
+ if (ret > 1) {
+ pkt_seg = pkt;
+ while (pkt_seg) {
+ rte_mbuf_refcnt_update(pkt_seg, -1);
+ pkt_seg = pkt_seg->next;
+ }
+ } else if (ret < 0) {
+ /* Revert the ol_flags in the event of failure. */
+ pkt->ol_flags = ol_flags;
+ }
+
+ return ret;
+}
diff --git a/lib/librte_gso/rte_gso.h b/lib/librte_gso/rte_gso.h
new file mode 100644
index 00000000..4b77176f
--- /dev/null
+++ b/lib/librte_gso/rte_gso.h
@@ -0,0 +1,148 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_GSO_H_
+#define _RTE_GSO_H_
+
+/**
+ * @file
+ * Interface to GSO library
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <rte_mbuf.h>
+
+/* Minimum GSO segment size. */
+#define RTE_GSO_SEG_SIZE_MIN (sizeof(struct ether_hdr) + \
+ sizeof(struct ipv4_hdr) + sizeof(struct tcp_hdr) + 1)
+
+/* GSO flags for rte_gso_ctx. */
+#define RTE_GSO_FLAG_IPID_FIXED (1ULL << 0)
+/**< Use fixed IP ids for output GSO segments. Setting
+ * 0 indicates using incremental IP ids.
+ */
+
+/**
+ * GSO context structure.
+ */
+struct rte_gso_ctx {
+ struct rte_mempool *direct_pool;
+ /**< MBUF pool for allocating direct buffers, which are used
+ * to store packet headers for GSO segments.
+ */
+ struct rte_mempool *indirect_pool;
+ /**< MBUF pool for allocating indirect buffers, which are used
+ * to locate packet payloads for GSO segments. The indirect
+ * buffer doesn't contain any data, but simply points to an
+ * offset within the packet to segment.
+ */
+ uint64_t flag;
+ /**< flag that controls specific attributes of output segments,
+ * such as the type of IP ID generated (i.e. fixed or incremental).
+ */
+ uint32_t gso_types;
+ /**< the bit mask of required GSO types. The GSO library
+ * uses the same macros as that of describing device TX
+ * offloading capabilities (i.e. DEV_TX_OFFLOAD_*_TSO) for
+ * gso_types.
+ *
+ * For example, if applications want to segment TCP/IPv4
+ * packets, set DEV_TX_OFFLOAD_TCP_TSO in gso_types.
+ */
+ uint16_t gso_size;
+ /**< maximum size of an output GSO segment, including packet
+ * header and payload, measured in bytes. Must exceed
+ * RTE_GSO_SEG_SIZE_MIN.
+ */
+};
+
+/**
+ * Segmentation function, which supports processing of both single- and
+ * multi- MBUF packets.
+ *
+ * Note that we refer to the packets that are segmented from the input
+ * packet as 'GSO segments'. rte_gso_segment() doesn't check if the
+ * input packet has correct checksums, and doesn't update checksums for
+ * output GSO segments. Additionally, it doesn't process IP fragment
+ * packets.
+ *
+ * Before calling rte_gso_segment(), applications must set proper ol_flags
+ * for the packet. The GSO library uses the same macros as that of TSO.
+ * For example, set PKT_TX_TCP_SEG and PKT_TX_IPV4 in ol_flags to segment
+ * a TCP/IPv4 packet. If rte_gso_segment() succceds, the PKT_TX_TCP_SEG
+ * flag is removed for all GSO segments and the input packet.
+ *
+ * Each of the newly-created GSO segments is organized as a two-segment
+ * MBUF, where the first segment is a standard MBUF, which stores a copy
+ * of packet header, and the second is an indirect MBUF which points to
+ * a section of data in the input packet. Since each GSO segment has
+ * multiple MBUFs (i.e. typically 2 MBUFs), the driver of the interface which
+ * the GSO segments are sent to should support transmission of multi-segment
+ * packets.
+ *
+ * If the input packet is GSO'd, its mbuf refcnt reduces by 1. Therefore,
+ * when all GSO segments are freed, the input packet is freed automatically.
+ *
+ * If the memory space in pkts_out or MBUF pools is insufficient, this
+ * function fails, and it returns (-1) * errno. Otherwise, GSO succeeds,
+ * and this function returns the number of output GSO segments filled in
+ * pkts_out.
+ *
+ * @param pkt
+ * The packet mbuf to segment.
+ * @param ctx
+ * GSO context object pointer.
+ * @param pkts_out
+ * Pointer array used to store the MBUF addresses of output GSO
+ * segments, when rte_gso_segment() succeeds.
+ * @param nb_pkts_out
+ * The max number of items that pkts_out can keep.
+ *
+ * @return
+ * - The number of GSO segments filled in pkts_out on success.
+ * - Return -ENOMEM if run out of memory in MBUF pools.
+ * - Return -EINVAL for invalid parameters.
+ */
+int rte_gso_segment(struct rte_mbuf *pkt,
+ const struct rte_gso_ctx *ctx,
+ struct rte_mbuf **pkts_out,
+ uint16_t nb_pkts_out);
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_GSO_H_ */
diff --git a/lib/librte_gso/rte_gso_version.map b/lib/librte_gso/rte_gso_version.map
new file mode 100644
index 00000000..e1fd453e
--- /dev/null
+++ b/lib/librte_gso/rte_gso_version.map
@@ -0,0 +1,7 @@
+DPDK_17.11 {
+ global:
+
+ rte_gso_segment;
+
+ local: *;
+};
diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
index 9cf13a04..1655b601 100644
--- a/lib/librte_hash/Makefile
+++ b/lib/librte_hash/Makefile
@@ -36,6 +36,7 @@ LIB = librte_hash.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+LDLIBS += -lrte_eal -lrte_ring
EXPORT_MAP := rte_hash_version.map
diff --git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_arm64.h
index 774428be..a3c216bb 100644
--- a/lib/librte_hash/rte_crc_arm64.h
+++ b/lib/librte_hash/rte_crc_arm64.h
@@ -116,8 +116,7 @@ rte_hash_crc_set_alg(uint8_t alg)
}
/* Setting the best available algorithm */
-static inline void __attribute__((constructor))
-rte_hash_crc_init_alg(void)
+RTE_INIT(rte_hash_crc_init_alg)
{
rte_hash_crc_set_alg(CRC32_ARM64);
}
diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index 87b25c01..55fd7bdc 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -44,7 +44,6 @@
#include <rte_memcpy.h>
#include <rte_prefetch.h>
#include <rte_branch_prediction.h>
-#include <rte_memzone.h>
#include <rte_malloc.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
@@ -417,9 +416,9 @@ rte_hash_reset(struct rte_hash *h)
/* Search for an entry that can be pushed to its alternative location */
static inline int
-make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt)
+make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt,
+ unsigned int *nr_pushes)
{
- static unsigned int nr_pushes;
unsigned i, j;
int ret;
uint32_t next_bucket_idx;
@@ -456,15 +455,14 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt)
break;
/* All entries have been pushed, so entry cannot be added */
- if (i == RTE_HASH_BUCKET_ENTRIES || nr_pushes > RTE_HASH_MAX_PUSHES)
+ if (i == RTE_HASH_BUCKET_ENTRIES || ++(*nr_pushes) > RTE_HASH_MAX_PUSHES)
return -ENOSPC;
/* Set flag to indicate that this entry is going to be pushed */
bkt->flag[i] = 1;
- nr_pushes++;
/* Need room in alternative bucket to insert the pushed entry */
- ret = make_space_bucket(h, next_bkt[i]);
+ ret = make_space_bucket(h, next_bkt[i], nr_pushes);
/*
* After recursive function.
* Clear flags and insert the pushed entry
@@ -472,7 +470,6 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt)
* or return error
*/
bkt->flag[i] = 0;
- nr_pushes = 0;
if (ret >= 0) {
next_bkt[i]->sig_alt[ret] = bkt->sig_current[i];
next_bkt[i]->sig_current[ret] = bkt->sig_alt[i];
@@ -515,6 +512,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
unsigned n_slots;
unsigned lcore_id;
struct lcore_cache *cached_free_slots = NULL;
+ unsigned int nr_pushes = 0;
if (h->add_key == ADD_KEY_MULTIWRITER)
rte_spinlock_lock(h->multiwriter_lock);
@@ -648,7 +646,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
* if successful or return error and
* store the new slot back in the ring
*/
- ret = make_space_bucket(h, prim_bkt);
+ ret = make_space_bucket(h, prim_bkt, &nr_pushes);
if (ret >= 0) {
prim_bkt->sig_current[ret] = sig;
prim_bkt->sig_alt[ret] = alt_hash;
diff --git a/lib/librte_hash/rte_fbk_hash.c b/lib/librte_hash/rte_fbk_hash.c
index 55c9f358..c87719fb 100644
--- a/lib/librte_hash/rte_fbk_hash.c
+++ b/lib/librte_hash/rte_fbk_hash.c
@@ -39,7 +39,6 @@
#include <sys/queue.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
#include <rte_malloc.h>
diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
index ea6be522..4f815aea 100644
--- a/lib/librte_hash/rte_hash_crc.h
+++ b/lib/librte_hash/rte_hash_crc.h
@@ -480,8 +480,7 @@ rte_hash_crc_set_alg(uint8_t alg)
}
/* Setting the best available algorithm */
-static inline void __attribute__((constructor))
-rte_hash_crc_init_alg(void)
+RTE_INIT(rte_hash_crc_init_alg)
{
rte_hash_crc_set_alg(CRC32_SSE42_x64);
}
diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index 207478c2..3eca1385 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -290,7 +290,10 @@ rte_jhash_32b_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t
/**
* The most generic version, hashes an arbitrary sequence
* of bytes. No alignment or length assumptions are made about
- * the input key.
+ * the input key. For keys not aligned to four byte boundaries
+ * or a multiple of four bytes in length, the memory region
+ * just after may be read (but not used in the computation).
+ * This may cross a page boundary.
*
* @param key
* Key to calculate hash of.
diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
index 2fffd61d..4fa5e07a 100644
--- a/lib/librte_hash/rte_thash.h
+++ b/lib/librte_hash/rte_thash.h
@@ -207,15 +207,14 @@ static inline uint32_t
rte_softrss(uint32_t *input_tuple, uint32_t input_len,
const uint8_t *rss_key)
{
- uint32_t i, j, ret = 0;
+ uint32_t i, j, map, ret = 0;
for (j = 0; j < input_len; j++) {
- for (i = 0; i < 32; i++) {
- if (input_tuple[j] & (1 << (31 - i))) {
- ret ^= rte_cpu_to_be_32(((const uint32_t *)rss_key)[j]) << i |
+ for (map = input_tuple[j]; map; map &= (map - 1)) {
+ i = rte_bsf32(map);
+ ret ^= rte_cpu_to_be_32(((const uint32_t *)rss_key)[j]) << (31 - i) |
(uint32_t)((uint64_t)(rte_cpu_to_be_32(((const uint32_t *)rss_key)[j + 1])) >>
- (32 - i));
- }
+ (i + 1));
}
}
return ret;
@@ -238,14 +237,13 @@ static inline uint32_t
rte_softrss_be(uint32_t *input_tuple, uint32_t input_len,
const uint8_t *rss_key)
{
- uint32_t i, j, ret = 0;
+ uint32_t i, j, map, ret = 0;
for (j = 0; j < input_len; j++) {
- for (i = 0; i < 32; i++) {
- if (input_tuple[j] & (1 << (31 - i))) {
- ret ^= ((const uint32_t *)rss_key)[j] << i |
- (uint32_t)((uint64_t)(((const uint32_t *)rss_key)[j + 1]) >> (32 - i));
- }
+ for (map = input_tuple[j]; map; map &= (map - 1)) {
+ i = rte_bsf32(map);
+ ret ^= ((const uint32_t *)rss_key)[j] << (31 - i) |
+ (uint32_t)((uint64_t)(((const uint32_t *)rss_key)[j + 1]) >> (i + 1));
}
}
return ret;
diff --git a/lib/librte_ip_frag/Makefile b/lib/librte_ip_frag/Makefile
index 4e693bf8..aff94b8c 100644
--- a/lib/librte_ip_frag/Makefile
+++ b/lib/librte_ip_frag/Makefile
@@ -36,8 +36,10 @@ LIB = librte_ip_frag.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
+LDLIBS += -lrte_hash
-EXPORT_MAP := rte_ipfrag_version.map
+EXPORT_MAP := rte_ip_frag_version.map
LIBABIVER := 1
diff --git a/lib/librte_ip_frag/rte_ipfrag_version.map b/lib/librte_ip_frag/rte_ip_frag_version.map
index d1acf07c..d1acf07c 100644
--- a/lib/librte_ip_frag/rte_ipfrag_version.map
+++ b/lib/librte_ip_frag/rte_ip_frag_version.map
diff --git a/lib/librte_jobstats/Makefile b/lib/librte_jobstats/Makefile
index 561a0678..d0bddd12 100644
--- a/lib/librte_jobstats/Makefile
+++ b/lib/librte_jobstats/Makefile
@@ -36,6 +36,7 @@ LIB = librte_jobstats.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+LDLIBS += -lrte_eal
EXPORT_MAP := rte_jobstats_version.map
diff --git a/lib/librte_jobstats/rte_jobstats.h b/lib/librte_jobstats/rte_jobstats.h
index 7e76fd50..70e034ca 100644
--- a/lib/librte_jobstats/rte_jobstats.h
+++ b/lib/librte_jobstats/rte_jobstats.h
@@ -117,7 +117,7 @@ struct rte_jobstats_context {
/**< Minimum loop execute time. */
uint64_t max_exec_time;
- /**< Minimum loop execute time. */
+ /**< Maximum loop execute time. */
/**
* Sum of time that is not the execute time (ex: from job finish to next
diff --git a/lib/librte_kni/Makefile b/lib/librte_kni/Makefile
index 70f1ca8f..56b19760 100644
--- a/lib/librte_kni/Makefile
+++ b/lib/librte_kni/Makefile
@@ -35,6 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_kni.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -fno-strict-aliasing
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
EXPORT_MAP := rte_kni_version.map
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 8c483c1f..5ee38e9a 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -456,7 +456,7 @@ va2pa(struct rte_mbuf *m)
{
return (void *)((unsigned long)m -
((unsigned long)m->buf_addr -
- (unsigned long)m->buf_physaddr));
+ (unsigned long)m->buf_iova));
}
static void
diff --git a/lib/librte_kni/rte_kni.h b/lib/librte_kni/rte_kni.h
index 37deb472..d1950791 100644
--- a/lib/librte_kni/rte_kni.h
+++ b/lib/librte_kni/rte_kni.h
@@ -63,13 +63,13 @@ struct rte_mbuf;
* Structure which has the function pointers for KNI interface.
*/
struct rte_kni_ops {
- uint8_t port_id; /* Port ID */
+ uint16_t port_id; /* Port ID */
/* Pointer to function of changing MTU */
- int (*change_mtu)(uint8_t port_id, unsigned new_mtu);
+ int (*change_mtu)(uint16_t port_id, unsigned int new_mtu);
/* Pointer to function of configuring network interface */
- int (*config_network_if)(uint8_t port_id, uint8_t if_up);
+ int (*config_network_if)(uint16_t port_id, uint8_t if_up);
};
/**
@@ -118,7 +118,7 @@ void rte_kni_init(unsigned int max_kni_ifaces);
* elements for each KNI interface allocated.
*
* @param pktmbuf_pool
- * The mempool for allocting mbufs for packets.
+ * The mempool for allocating mbufs for packets.
* @param conf
* The pointer to the configurations of the KNI device.
* @param ops
diff --git a/lib/librte_kvargs/Makefile b/lib/librte_kvargs/Makefile
index 564dd310..4eaa9334 100644
--- a/lib/librte_kvargs/Makefile
+++ b/lib/librte_kvargs/Makefile
@@ -37,6 +37,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_kvargs.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+LDLIBS += -lrte_eal
EXPORT_MAP := rte_kvargs_version.map
diff --git a/lib/librte_latencystats/Makefile b/lib/librte_latencystats/Makefile
index eaacbb73..665c7b41 100644
--- a/lib/librte_latencystats/Makefile
+++ b/lib/librte_latencystats/Makefile
@@ -36,6 +36,7 @@ LIB = librte_latencystats.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
LDLIBS += -lm
LDLIBS += -lpthread
+LDLIBS += -lrte_eal -lrte_metrics -lrte_ethdev -lrte_mbuf
EXPORT_MAP := rte_latencystats_version.map
diff --git a/lib/librte_latencystats/rte_latencystats.c b/lib/librte_latencystats/rte_latencystats.c
index ce029a12..d6ad13c4 100644
--- a/lib/librte_latencystats/rte_latencystats.c
+++ b/lib/librte_latencystats/rte_latencystats.c
@@ -135,7 +135,7 @@ rte_latencystats_fill_values(struct rte_metric_value *values)
}
static uint16_t
-add_time_stamps(uint8_t pid __rte_unused,
+add_time_stamps(uint16_t pid __rte_unused,
uint16_t qid __rte_unused,
struct rte_mbuf **pkts,
uint16_t nb_pkts,
@@ -165,7 +165,7 @@ add_time_stamps(uint8_t pid __rte_unused,
}
static uint16_t
-calc_latency(uint8_t pid __rte_unused,
+calc_latency(uint16_t pid __rte_unused,
uint16_t qid __rte_unused,
struct rte_mbuf **pkts,
uint16_t nb_pkts,
@@ -226,10 +226,10 @@ rte_latencystats_init(uint64_t app_samp_intvl,
rte_latency_stats_flow_type_fn user_cb)
{
unsigned int i;
- uint8_t pid;
+ uint16_t pid;
uint16_t qid;
struct rxtx_cbs *cbs = NULL;
- const uint8_t nb_ports = rte_eth_dev_count();
+ const uint16_t nb_ports = rte_eth_dev_count();
const char *ptr_strings[NUM_LATENCY_STATS] = {0};
const struct rte_memzone *mz = NULL;
const unsigned int flags = 0;
@@ -290,11 +290,11 @@ rte_latencystats_init(uint64_t app_samp_intvl,
int
rte_latencystats_uninit(void)
{
- uint8_t pid;
+ uint16_t pid;
uint16_t qid;
int ret = 0;
struct rxtx_cbs *cbs = NULL;
- const uint8_t nb_ports = rte_eth_dev_count();
+ const uint16_t nb_ports = rte_eth_dev_count();
/** De register Rx/Tx callbacks */
for (pid = 0; pid < nb_ports; pid++) {
diff --git a/lib/librte_lpm/Makefile b/lib/librte_lpm/Makefile
index 32be46b3..2e8749e8 100644
--- a/lib/librte_lpm/Makefile
+++ b/lib/librte_lpm/Makefile
@@ -36,6 +36,7 @@ LIB = librte_lpm.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+LDLIBS += -lrte_eal
EXPORT_MAP := rte_lpm_version.map
diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c
index 64c074e9..e1f1fad5 100644
--- a/lib/librte_lpm/rte_lpm.c
+++ b/lib/librte_lpm/rte_lpm.c
@@ -43,7 +43,6 @@
#include <rte_common.h>
#include <rte_memory.h> /* for definition of RTE_CACHE_LINE_SIZE */
#include <rte_malloc.h>
-#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
#include <rte_per_lcore.h>
@@ -218,6 +217,7 @@ rte_lpm_create_v20(const char *name, int socket_id, int max_rules,
te = rte_zmalloc("LPM_TAILQ_ENTRY", sizeof(*te), 0);
if (te == NULL) {
RTE_LOG(ERR, LPM, "Failed to allocate tailq entry\n");
+ rte_errno = ENOMEM;
goto exit;
}
@@ -227,6 +227,7 @@ rte_lpm_create_v20(const char *name, int socket_id, int max_rules,
if (lpm == NULL) {
RTE_LOG(ERR, LPM, "LPM memory allocation failed\n");
rte_free(te);
+ rte_errno = ENOMEM;
goto exit;
}
@@ -292,6 +293,7 @@ rte_lpm_create_v1604(const char *name, int socket_id,
te = rte_zmalloc("LPM_TAILQ_ENTRY", sizeof(*te), 0);
if (te == NULL) {
RTE_LOG(ERR, LPM, "Failed to allocate tailq entry\n");
+ rte_errno = ENOMEM;
goto exit;
}
@@ -301,6 +303,7 @@ rte_lpm_create_v1604(const char *name, int socket_id,
if (lpm == NULL) {
RTE_LOG(ERR, LPM, "LPM memory allocation failed\n");
rte_free(te);
+ rte_errno = ENOMEM;
goto exit;
}
@@ -312,6 +315,7 @@ rte_lpm_create_v1604(const char *name, int socket_id,
rte_free(lpm);
lpm = NULL;
rte_free(te);
+ rte_errno = ENOMEM;
goto exit;
}
@@ -324,6 +328,7 @@ rte_lpm_create_v1604(const char *name, int socket_id,
rte_free(lpm);
lpm = NULL;
rte_free(te);
+ rte_errno = ENOMEM;
goto exit;
}
diff --git a/lib/librte_lpm/rte_lpm6.c b/lib/librte_lpm/rte_lpm6.c
index b4a7df34..03668d9e 100644
--- a/lib/librte_lpm/rte_lpm6.c
+++ b/lib/librte_lpm/rte_lpm6.c
@@ -42,7 +42,6 @@
#include <rte_common.h>
#include <rte_memory.h>
#include <rte_malloc.h>
-#include <rte_memzone.h>
#include <rte_memcpy.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
@@ -191,6 +190,7 @@ rte_lpm6_create(const char *name, int socket_id,
te = rte_zmalloc("LPM6_TAILQ_ENTRY", sizeof(*te), 0);
if (te == NULL) {
RTE_LOG(ERR, LPM, "Failed to allocate tailq entry!\n");
+ rte_errno = ENOMEM;
goto exit;
}
@@ -201,6 +201,7 @@ rte_lpm6_create(const char *name, int socket_id,
if (lpm == NULL) {
RTE_LOG(ERR, LPM, "LPM memory allocation failed\n");
rte_free(te);
+ rte_errno = ENOMEM;
goto exit;
}
@@ -212,6 +213,7 @@ rte_lpm6_create(const char *name, int socket_id,
rte_free(lpm);
lpm = NULL;
rte_free(te);
+ rte_errno = ENOMEM;
goto exit;
}
@@ -518,7 +520,7 @@ rte_lpm6_add_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
uint32_t next_hop)
{
struct rte_lpm6_tbl_entry *tbl;
- struct rte_lpm6_tbl_entry *tbl_next;
+ struct rte_lpm6_tbl_entry *tbl_next = NULL;
int32_t rule_index;
int status;
uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile
index 54827305..f6be3536 100644
--- a/lib/librte_mbuf/Makefile
+++ b/lib/librte_mbuf/Makefile
@@ -35,6 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_mbuf.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+LDLIBS += -lrte_eal -lrte_mempool
EXPORT_MAP := rte_mbuf_version.map
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index 26a62b8e..2e08b9e9 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -46,7 +46,6 @@
#include <rte_common.h>
#include <rte_log.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_per_lcore.h>
@@ -135,7 +134,7 @@ rte_pktmbuf_init(struct rte_mempool *mp,
/* start of buffer is after mbuf structure and priv data */
m->priv_size = priv_size;
m->buf_addr = (char *)m + mbuf_size;
- m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size;
+ m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
m->buf_len = (uint16_t)buf_len;
/* keep some headroom between start of buffer and data */
@@ -144,7 +143,7 @@ rte_pktmbuf_init(struct rte_mempool *mp,
/* init some constant fields */
m->pool = mp;
m->nb_segs = 1;
- m->port = 0xff;
+ m->port = MBUF_INVALID_PORT;
rte_mbuf_refcnt_set(m, 1);
m->next = NULL;
}
@@ -157,6 +156,7 @@ rte_pktmbuf_pool_create(const char *name, unsigned n,
{
struct rte_mempool *mp;
struct rte_pktmbuf_pool_private mbp_priv;
+ const char *mp_ops_name;
unsigned elt_size;
int ret;
@@ -176,8 +176,8 @@ rte_pktmbuf_pool_create(const char *name, unsigned n,
if (mp == NULL)
return NULL;
- ret = rte_mempool_set_ops_byname(mp,
- RTE_MBUF_DEFAULT_MEMPOOL_OPS, NULL);
+ mp_ops_name = rte_eal_mbuf_default_mempool_ops();
+ ret = rte_mempool_set_ops_byname(mp, mp_ops_name, NULL);
if (ret != 0) {
RTE_LOG(ERR, MBUF, "error setting mempool handler\n");
rte_mempool_free(mp);
@@ -211,8 +211,8 @@ rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header)
/* generic checks */
if (m->pool == NULL)
rte_panic("bad mbuf pool\n");
- if (m->buf_physaddr == 0)
- rte_panic("bad phys addr\n");
+ if (m->buf_iova == 0)
+ rte_panic("bad IO addr\n");
if (m->buf_addr == NULL)
rte_panic("bad virt addr\n");
@@ -243,8 +243,8 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
__rte_mbuf_sanity_check(m, 1);
- fprintf(f, "dump mbuf at %p, phys=%"PRIx64", buf_len=%u\n",
- m, (uint64_t)m->buf_physaddr, (unsigned)m->buf_len);
+ fprintf(f, "dump mbuf at %p, iova=%"PRIx64", buf_len=%u\n",
+ m, (uint64_t)m->buf_iova, (unsigned)m->buf_len);
fprintf(f, " pkt_len=%"PRIu32", ol_flags=%"PRIx64", nb_segs=%u, "
"in_port=%u\n", m->pkt_len, m->ol_flags,
(unsigned)m->nb_segs, (unsigned)m->port);
@@ -307,7 +307,7 @@ const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off,
const char *rte_get_rx_ol_flag_name(uint64_t mask)
{
switch (mask) {
- case PKT_RX_VLAN_PKT: return "PKT_RX_VLAN_PKT";
+ case PKT_RX_VLAN: return "PKT_RX_VLAN";
case PKT_RX_RSS_HASH: return "PKT_RX_RSS_HASH";
case PKT_RX_FDIR: return "PKT_RX_FDIR";
case PKT_RX_L4_CKSUM_BAD: return "PKT_RX_L4_CKSUM_BAD";
@@ -323,6 +323,8 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask)
case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED";
case PKT_RX_LRO: return "PKT_RX_LRO";
case PKT_RX_TIMESTAMP: return "PKT_RX_TIMESTAMP";
+ case PKT_RX_SEC_OFFLOAD: return "PKT_RX_SEC_OFFLOAD";
+ case PKT_RX_SEC_OFFLOAD_FAILED: return "PKT_RX_SEC_OFFLOAD_FAILED";
default: return NULL;
}
}
@@ -338,7 +340,7 @@ int
rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
{
const struct flag_mask rx_flags[] = {
- { PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT, NULL },
+ { PKT_RX_VLAN, PKT_RX_VLAN, NULL },
{ PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, NULL },
{ PKT_RX_FDIR, PKT_RX_FDIR, NULL },
{ PKT_RX_L4_CKSUM_BAD, PKT_RX_L4_CKSUM_MASK, NULL },
@@ -358,6 +360,9 @@ rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
{ PKT_RX_QINQ_STRIPPED, PKT_RX_QINQ_STRIPPED, NULL },
{ PKT_RX_LRO, PKT_RX_LRO, NULL },
{ PKT_RX_TIMESTAMP, PKT_RX_TIMESTAMP, NULL },
+ { PKT_RX_SEC_OFFLOAD, PKT_RX_SEC_OFFLOAD, NULL },
+ { PKT_RX_SEC_OFFLOAD_FAILED, PKT_RX_SEC_OFFLOAD_FAILED, NULL },
+ { PKT_RX_QINQ, PKT_RX_QINQ, NULL },
};
const char *name;
unsigned int i;
@@ -410,6 +415,7 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask)
case PKT_TX_TUNNEL_GENEVE: return "PKT_TX_TUNNEL_GENEVE";
case PKT_TX_TUNNEL_MPLSINUDP: return "PKT_TX_TUNNEL_MPLSINUDP";
case PKT_TX_MACSEC: return "PKT_TX_MACSEC";
+ case PKT_TX_SEC_OFFLOAD: return "PKT_TX_SEC_OFFLOAD";
default: return NULL;
}
}
@@ -443,6 +449,7 @@ rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
{ PKT_TX_TUNNEL_MPLSINUDP, PKT_TX_TUNNEL_MASK,
"PKT_TX_TUNNEL_NONE" },
{ PKT_TX_MACSEC, PKT_TX_MACSEC, NULL },
+ { PKT_TX_SEC_OFFLOAD, PKT_TX_SEC_OFFLOAD, NULL },
};
const char *name;
unsigned int i;
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index eaed7eee..6d91f7d3 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -89,12 +89,13 @@ extern "C" {
*/
/**
- * RX packet is a 802.1q VLAN packet. This flag was set by PMDs when
- * the packet is recognized as a VLAN, but the behavior between PMDs
- * was not the same. This flag is kept for some time to avoid breaking
- * applications and should be replaced by PKT_RX_VLAN_STRIPPED.
+ * The RX packet is a 802.1q VLAN packet, and the tci has been
+ * saved in in mbuf->vlan_tci.
+ * If the flag PKT_RX_VLAN_STRIPPED is also present, the VLAN
+ * header has been stripped from mbuf data, else it is still
+ * present.
*/
-#define PKT_RX_VLAN_PKT (1ULL << 0)
+#define PKT_RX_VLAN (1ULL << 0)
#define PKT_RX_RSS_HASH (1ULL << 1) /**< RX packet with RSS hash result. */
#define PKT_RX_FDIR (1ULL << 2) /**< RX packet with FDIR match indicate. */
@@ -123,6 +124,7 @@ extern "C" {
* A vlan has been stripped by the hardware and its tci is saved in
* mbuf->vlan_tci. This can only happen if vlan stripping is enabled
* in the RX configuration of the PMD.
+ * When PKT_RX_VLAN_STRIPPED is set, PKT_RX_VLAN must also be set.
*/
#define PKT_RX_VLAN_STRIPPED (1ULL << 6)
@@ -165,19 +167,13 @@ extern "C" {
* The 2 vlans have been stripped by the hardware and their tci are
* saved in mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer).
* This can only happen if vlan stripping is enabled in the RX
- * configuration of the PMD. If this flag is set, PKT_RX_VLAN_STRIPPED
- * must also be set.
+ * configuration of the PMD. If this flag is set,
+ * When PKT_RX_QINQ_STRIPPED is set, the flags (PKT_RX_VLAN |
+ * PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ) must also be set.
*/
#define PKT_RX_QINQ_STRIPPED (1ULL << 15)
/**
- * Deprecated.
- * RX packet with double VLAN stripped.
- * This flag is replaced by PKT_RX_QINQ_STRIPPED.
- */
-#define PKT_RX_QINQ_PKT PKT_RX_QINQ_STRIPPED
-
-/**
* When packets are coalesced by a hardware or virtual driver, this flag
* can be set in the RX mbuf, meaning that the m->tso_segsz field is
* valid and is set to the segment size of original packets.
@@ -189,11 +185,35 @@ extern "C" {
*/
#define PKT_RX_TIMESTAMP (1ULL << 17)
+/**
+ * Indicate that security offload processing was applied on the RX packet.
+ */
+#define PKT_RX_SEC_OFFLOAD (1ULL << 18)
+
+/**
+ * Indicate that security offload processing failed on the RX packet.
+ */
+#define PKT_RX_SEC_OFFLOAD_FAILED (1ULL << 19)
+
+/**
+ * The RX packet is a double VLAN, and the outer tci has been
+ * saved in in mbuf->vlan_tci_outer.
+ * If the flag PKT_RX_QINQ_STRIPPED is also present, both VLANs
+ * headers have been stripped from mbuf data, else they are still
+ * present.
+ */
+#define PKT_RX_QINQ (1ULL << 20)
+
/* add new RX flags here */
/* add new TX flags here */
/**
+ * Request security offload processing on the TX packet.
+ */
+#define PKT_TX_SEC_OFFLOAD (1ULL << 43)
+
+/**
* Offload the MACsec. This flag must be set by the application to enable
* this offload feature for a packet to be transmitted.
*/
@@ -316,7 +336,8 @@ extern "C" {
PKT_TX_QINQ_PKT | \
PKT_TX_VLAN_PKT | \
PKT_TX_TUNNEL_MASK | \
- PKT_TX_MACSEC)
+ PKT_TX_MACSEC | \
+ PKT_TX_SEC_OFFLOAD)
#define __RESERVED (1ULL << 61) /**< reserved for future mbuf use */
@@ -411,7 +432,11 @@ struct rte_mbuf {
* same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
* working on vector drivers easier.
*/
- phys_addr_t buf_physaddr __rte_aligned(sizeof(phys_addr_t));
+ RTE_STD_C11
+ union {
+ rte_iova_t buf_iova;
+ rte_iova_t buf_physaddr; /**< deprecated */
+ } __rte_aligned(sizeof(rte_iova_t));
/* next 8 bytes are initialised on RX descriptor rearm */
MARKER64 rearm_data;
@@ -456,8 +481,21 @@ struct rte_mbuf {
uint32_t l3_type:4; /**< (Outer) L3 type. */
uint32_t l4_type:4; /**< (Outer) L4 type. */
uint32_t tun_type:4; /**< Tunnel type. */
- uint32_t inner_l2_type:4; /**< Inner L2 type. */
- uint32_t inner_l3_type:4; /**< Inner L3 type. */
+ RTE_STD_C11
+ union {
+ uint8_t inner_esp_next_proto;
+ /**< ESP next protocol type, valid if
+ * RTE_PTYPE_TUNNEL_ESP tunnel type is set
+ * on both Tx and Rx.
+ */
+ __extension__
+ struct {
+ uint8_t inner_l2_type:4;
+ /**< Inner L2 type. */
+ uint8_t inner_l3_type:4;
+ /**< Inner L3 type. */
+ };
+ };
uint32_t inner_l4_type:4; /**< Inner L4 type. */
};
};
@@ -587,21 +625,28 @@ rte_mbuf_prefetch_part2(struct rte_mbuf *m)
static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
/**
- * Return the DMA address of the beginning of the mbuf data
+ * Return the IO address of the beginning of the mbuf data
*
* @param mb
* The pointer to the mbuf.
* @return
- * The physical address of the beginning of the mbuf data
+ * The IO address of the beginning of the mbuf data
*/
+static inline rte_iova_t
+rte_mbuf_data_iova(const struct rte_mbuf *mb)
+{
+ return mb->buf_iova + mb->data_off;
+}
+
+__rte_deprecated
static inline phys_addr_t
rte_mbuf_data_dma_addr(const struct rte_mbuf *mb)
{
- return mb->buf_physaddr + mb->data_off;
+ return rte_mbuf_data_iova(mb);
}
/**
- * Return the default DMA address of the beginning of the mbuf data
+ * Return the default IO address of the beginning of the mbuf data
*
* This function is used by drivers in their receive function, as it
* returns the location where data should be written by the NIC, taking
@@ -610,12 +655,19 @@ rte_mbuf_data_dma_addr(const struct rte_mbuf *mb)
* @param mb
* The pointer to the mbuf.
* @return
- * The physical address of the beginning of the mbuf data
+ * The IO address of the beginning of the mbuf data
*/
+static inline rte_iova_t
+rte_mbuf_data_iova_default(const struct rte_mbuf *mb)
+{
+ return mb->buf_iova + RTE_PKTMBUF_HEADROOM;
+}
+
+__rte_deprecated
static inline phys_addr_t
rte_mbuf_data_dma_addr_default(const struct rte_mbuf *mb)
{
- return mb->buf_physaddr + RTE_PKTMBUF_HEADROOM;
+ return rte_mbuf_data_iova_default(mb);
}
/**
@@ -806,7 +858,7 @@ rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header);
* For standard needs, prefer rte_pktmbuf_alloc().
*
* The caller can expect that the following fields of the mbuf structure
- * are initialized: buf_addr, buf_physaddr, buf_len, refcnt=1, nb_segs=1,
+ * are initialized: buf_addr, buf_iova, buf_len, refcnt=1, nb_segs=1,
* next=NULL, pool, priv_size. The other fields must be initialized
* by the caller.
*
@@ -1087,6 +1139,8 @@ static inline void rte_pktmbuf_reset_headroom(struct rte_mbuf *m)
* @param m
* The packet mbuf to be resetted.
*/
+#define MBUF_INVALID_PORT UINT16_MAX
+
static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
{
m->next = NULL;
@@ -1095,7 +1149,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
m->vlan_tci = 0;
m->vlan_tci_outer = 0;
m->nb_segs = 1;
- m->port = 0xff;
+ m->port = MBUF_INVALID_PORT;
m->ol_flags = 0;
m->packet_type = 0;
@@ -1214,7 +1268,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
rte_mbuf_refcnt_update(md, 1);
mi->priv_size = m->priv_size;
- mi->buf_physaddr = m->buf_physaddr;
+ mi->buf_iova = m->buf_iova;
mi->buf_addr = m->buf_addr;
mi->buf_len = m->buf_len;
@@ -1262,7 +1316,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
m->priv_size = priv_size;
m->buf_addr = (char *)m + mbuf_size;
- m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size;
+ m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
m->buf_len = (uint16_t)buf_len;
rte_pktmbuf_reset_headroom(m);
m->data_len = 0;
@@ -1524,7 +1578,7 @@ static inline struct rte_mbuf *rte_pktmbuf_lastseg(struct rte_mbuf *m)
#define rte_pktmbuf_mtod(m, t) rte_pktmbuf_mtod_offset(m, t, 0)
/**
- * A macro that returns the physical address that points to an offset of the
+ * A macro that returns the IO address that points to an offset of the
* start of the data in the mbuf
*
* @param m
@@ -1532,17 +1586,24 @@ static inline struct rte_mbuf *rte_pktmbuf_lastseg(struct rte_mbuf *m)
* @param o
* The offset into the data to calculate address from.
*/
+#define rte_pktmbuf_iova_offset(m, o) \
+ (rte_iova_t)((m)->buf_iova + (m)->data_off + (o))
+
+/* deprecated */
#define rte_pktmbuf_mtophys_offset(m, o) \
- (phys_addr_t)((m)->buf_physaddr + (m)->data_off + (o))
+ rte_pktmbuf_iova_offset(m, o)
/**
- * A macro that returns the physical address that points to the start of the
+ * A macro that returns the IO address that points to the start of the
* data in the mbuf
*
* @param m
* The packet mbuf.
*/
-#define rte_pktmbuf_mtophys(m) rte_pktmbuf_mtophys_offset(m, 0)
+#define rte_pktmbuf_iova(m) rte_pktmbuf_iova_offset(m, 0)
+
+/* deprecated */
+#define rte_pktmbuf_mtophys(m) rte_pktmbuf_iova(m)
/**
* A macro that returns the length of the packet.
diff --git a/lib/librte_mbuf/rte_mbuf_ptype.c b/lib/librte_mbuf/rte_mbuf_ptype.c
index e5c4fae3..a623226c 100644
--- a/lib/librte_mbuf/rte_mbuf_ptype.c
+++ b/lib/librte_mbuf/rte_mbuf_ptype.c
@@ -89,6 +89,9 @@ const char *rte_get_ptype_tunnel_name(uint32_t ptype)
case RTE_PTYPE_TUNNEL_NVGRE: return "TUNNEL_NVGRE";
case RTE_PTYPE_TUNNEL_GENEVE: return "TUNNEL_GENEVE";
case RTE_PTYPE_TUNNEL_GRENAT: return "TUNNEL_GRENAT";
+ case RTE_PTYPE_TUNNEL_GTPC: return "TUNNEL_GTPC";
+ case RTE_PTYPE_TUNNEL_GTPU: return "TUNNEL_GTPU";
+ case RTE_PTYPE_TUNNEL_ESP: return "TUNNEL_ESP";
default: return "TUNNEL_UNKNOWN";
}
}
diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h
index acd70bb6..5c62435c 100644
--- a/lib/librte_mbuf/rte_mbuf_ptype.h
+++ b/lib/librte_mbuf/rte_mbuf_ptype.h
@@ -383,6 +383,49 @@ extern "C" {
*/
#define RTE_PTYPE_TUNNEL_GRENAT 0x00006000
/**
+ * GTP-C (GPRS Tunnelling Protocol) control tunneling packet type.
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17
+ * | 'destination port'=2123>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17
+ * | 'destination port'=2123>
+ * or,
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17
+ * | 'source port'=2123>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17
+ * | 'source port'=2123>
+ */
+#define RTE_PTYPE_TUNNEL_GTPC 0x00007000
+/**
+ * GTP-U (GPRS Tunnelling Protocol) user data tunneling packet type.
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17
+ * | 'destination port'=2152>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17
+ * | 'destination port'=2152>
+ */
+#define RTE_PTYPE_TUNNEL_GTPU 0x00008000
+/**
+ * ESP (IP Encapsulating Security Payload) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=51>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=51>
+ */
+#define RTE_PTYPE_TUNNEL_ESP 0x00009000
+/**
* Mask of tunneling packet types.
*/
#define RTE_PTYPE_TUNNEL_MASK 0x0000f000
diff --git a/lib/librte_eal/linuxapp/xen_dom0/Makefile b/lib/librte_member/Makefile
index be51a82a..f4cf101e 100644
--- a/lib/librte_eal/linuxapp/xen_dom0/Makefile
+++ b/lib/librte_member/Makefile
@@ -1,6 +1,6 @@
# BSD LICENSE
#
-# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# Copyright(c) 2017 Intel Corporation. All rights reserved.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@@ -31,23 +31,22 @@
include $(RTE_SDK)/mk/rte.vars.mk
-#
-# module name and path
-#
-MODULE = rte_dom0_mm
+# library name
+LIB = librte_member.a
-#
-# CFLAGS
-#
-MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50
-MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
-MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
-MODULE_CFLAGS += -Wall -Werror
+CFLAGS := -I$(SRCDIR) $(CFLAGS)
+CFLAGS += $(WERROR_FLAGS) -O3
-#
-# all source are stored in SRCS-y
-#
+LDLIBS += -lm
+LDLIBS += -lrte_eal -lrte_hash
+
+EXPORT_MAP := rte_member_version.map
-SRCS-y += dom0_mm_misc.c
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_MEMBER) += rte_member.c rte_member_ht.c rte_member_vbf.c
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_MEMBER)-include := rte_member.h
-include $(RTE_SDK)/mk/rte.module.mk
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_member/rte_member.c b/lib/librte_member/rte_member.c
new file mode 100644
index 00000000..cc9ea84a
--- /dev/null
+++ b/lib/librte_member/rte_member.c
@@ -0,0 +1,336 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+
+#include "rte_member.h"
+#include "rte_member_ht.h"
+#include "rte_member_vbf.h"
+
+int librte_member_logtype;
+
+TAILQ_HEAD(rte_member_list, rte_tailq_entry);
+static struct rte_tailq_elem rte_member_tailq = {
+ .name = "RTE_MEMBER",
+};
+EAL_REGISTER_TAILQ(rte_member_tailq)
+
+struct rte_member_setsum *
+rte_member_find_existing(const char *name)
+{
+ struct rte_member_setsum *setsum = NULL;
+ struct rte_tailq_entry *te;
+ struct rte_member_list *member_list;
+
+ member_list = RTE_TAILQ_CAST(rte_member_tailq.head, rte_member_list);
+
+ rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+ TAILQ_FOREACH(te, member_list, next) {
+ setsum = (struct rte_member_setsum *) te->data;
+ if (strncmp(name, setsum->name, RTE_MEMBER_NAMESIZE) == 0)
+ break;
+ }
+ rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+ if (te == NULL) {
+ rte_errno = ENOENT;
+ return NULL;
+ }
+ return setsum;
+}
+
+void
+rte_member_free(struct rte_member_setsum *setsum)
+{
+ struct rte_member_list *member_list;
+ struct rte_tailq_entry *te;
+
+ if (setsum == NULL)
+ return;
+ member_list = RTE_TAILQ_CAST(rte_member_tailq.head, rte_member_list);
+ rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+ TAILQ_FOREACH(te, member_list, next) {
+ if (te->data == (void *)setsum)
+ break;
+ }
+ if (te == NULL) {
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+ return;
+ }
+ TAILQ_REMOVE(member_list, te, next);
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+ switch (setsum->type) {
+ case RTE_MEMBER_TYPE_HT:
+ rte_member_free_ht(setsum);
+ break;
+ case RTE_MEMBER_TYPE_VBF:
+ rte_member_free_vbf(setsum);
+ break;
+ default:
+ break;
+ }
+ rte_free(setsum);
+ rte_free(te);
+}
+
+struct rte_member_setsum *
+rte_member_create(const struct rte_member_parameters *params)
+{
+ struct rte_tailq_entry *te;
+ struct rte_member_list *member_list;
+ struct rte_member_setsum *setsum;
+ int ret;
+
+ if (params == NULL) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ if (params->key_len == 0 ||
+ params->prim_hash_seed == params->sec_hash_seed) {
+ rte_errno = EINVAL;
+ RTE_MEMBER_LOG(ERR, "Create setsummary with "
+ "invalid parameters\n");
+ return NULL;
+ }
+
+ member_list = RTE_TAILQ_CAST(rte_member_tailq.head, rte_member_list);
+
+ rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+ TAILQ_FOREACH(te, member_list, next) {
+ setsum = (struct rte_member_setsum *) te->data;
+ if (strncmp(params->name, setsum->name,
+ RTE_MEMBER_NAMESIZE) == 0)
+ break;
+ }
+ setsum = NULL;
+ if (te != NULL) {
+ rte_errno = EEXIST;
+ te = NULL;
+ goto error_unlock_exit;
+ }
+ te = rte_zmalloc("MEMBER_TAILQ_ENTRY", sizeof(*te), 0);
+ if (te == NULL) {
+ RTE_MEMBER_LOG(ERR, "tailq entry allocation failed\n");
+ goto error_unlock_exit;
+ }
+
+ /* Create a new setsum structure */
+ setsum = (struct rte_member_setsum *) rte_zmalloc_socket(params->name,
+ sizeof(struct rte_member_setsum), RTE_CACHE_LINE_SIZE,
+ params->socket_id);
+ if (setsum == NULL) {
+ RTE_MEMBER_LOG(ERR, "Create setsummary failed\n");
+ goto error_unlock_exit;
+ }
+ snprintf(setsum->name, sizeof(setsum->name), "%s", params->name);
+ setsum->type = params->type;
+ setsum->socket_id = params->socket_id;
+ setsum->key_len = params->key_len;
+ setsum->num_set = params->num_set;
+ setsum->prim_hash_seed = params->prim_hash_seed;
+ setsum->sec_hash_seed = params->sec_hash_seed;
+
+ switch (setsum->type) {
+ case RTE_MEMBER_TYPE_HT:
+ ret = rte_member_create_ht(setsum, params);
+ break;
+ case RTE_MEMBER_TYPE_VBF:
+ ret = rte_member_create_vbf(setsum, params);
+ break;
+ default:
+ goto error_unlock_exit;
+ }
+ if (ret < 0)
+ goto error_unlock_exit;
+
+ RTE_MEMBER_LOG(DEBUG, "Creating a setsummary table with "
+ "mode %u\n", setsum->type);
+
+ te->data = (void *)setsum;
+ TAILQ_INSERT_TAIL(member_list, te, next);
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+ return setsum;
+
+error_unlock_exit:
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+ rte_member_free(setsum);
+ return NULL;
+}
+
+int
+rte_member_add(const struct rte_member_setsum *setsum, const void *key,
+ member_set_t set_id)
+{
+ if (setsum == NULL || key == NULL)
+ return -EINVAL;
+
+ switch (setsum->type) {
+ case RTE_MEMBER_TYPE_HT:
+ return rte_member_add_ht(setsum, key, set_id);
+ case RTE_MEMBER_TYPE_VBF:
+ return rte_member_add_vbf(setsum, key, set_id);
+ default:
+ return -EINVAL;
+ }
+}
+
+int
+rte_member_lookup(const struct rte_member_setsum *setsum, const void *key,
+ member_set_t *set_id)
+{
+ if (setsum == NULL || key == NULL || set_id == NULL)
+ return -EINVAL;
+
+ switch (setsum->type) {
+ case RTE_MEMBER_TYPE_HT:
+ return rte_member_lookup_ht(setsum, key, set_id);
+ case RTE_MEMBER_TYPE_VBF:
+ return rte_member_lookup_vbf(setsum, key, set_id);
+ default:
+ return -EINVAL;
+ }
+}
+
+int
+rte_member_lookup_bulk(const struct rte_member_setsum *setsum,
+ const void **keys, uint32_t num_keys,
+ member_set_t *set_ids)
+{
+ if (setsum == NULL || keys == NULL || set_ids == NULL)
+ return -EINVAL;
+
+ switch (setsum->type) {
+ case RTE_MEMBER_TYPE_HT:
+ return rte_member_lookup_bulk_ht(setsum, keys, num_keys,
+ set_ids);
+ case RTE_MEMBER_TYPE_VBF:
+ return rte_member_lookup_bulk_vbf(setsum, keys, num_keys,
+ set_ids);
+ default:
+ return -EINVAL;
+ }
+}
+
+int
+rte_member_lookup_multi(const struct rte_member_setsum *setsum, const void *key,
+ uint32_t match_per_key, member_set_t *set_id)
+{
+ if (setsum == NULL || key == NULL || set_id == NULL)
+ return -EINVAL;
+
+ switch (setsum->type) {
+ case RTE_MEMBER_TYPE_HT:
+ return rte_member_lookup_multi_ht(setsum, key, match_per_key,
+ set_id);
+ case RTE_MEMBER_TYPE_VBF:
+ return rte_member_lookup_multi_vbf(setsum, key, match_per_key,
+ set_id);
+ default:
+ return -EINVAL;
+ }
+}
+
+int
+rte_member_lookup_multi_bulk(const struct rte_member_setsum *setsum,
+ const void **keys, uint32_t num_keys,
+ uint32_t max_match_per_key, uint32_t *match_count,
+ member_set_t *set_ids)
+{
+ if (setsum == NULL || keys == NULL || set_ids == NULL ||
+ match_count == NULL)
+ return -EINVAL;
+
+ switch (setsum->type) {
+ case RTE_MEMBER_TYPE_HT:
+ return rte_member_lookup_multi_bulk_ht(setsum, keys, num_keys,
+ max_match_per_key, match_count, set_ids);
+ case RTE_MEMBER_TYPE_VBF:
+ return rte_member_lookup_multi_bulk_vbf(setsum, keys, num_keys,
+ max_match_per_key, match_count, set_ids);
+ default:
+ return -EINVAL;
+ }
+}
+
+int
+rte_member_delete(const struct rte_member_setsum *setsum, const void *key,
+ member_set_t set_id)
+{
+ if (setsum == NULL || key == NULL)
+ return -EINVAL;
+
+ switch (setsum->type) {
+ case RTE_MEMBER_TYPE_HT:
+ return rte_member_delete_ht(setsum, key, set_id);
+ /* current vBF implementation does not support delete function */
+ case RTE_MEMBER_TYPE_VBF:
+ default:
+ return -EINVAL;
+ }
+}
+
+void
+rte_member_reset(const struct rte_member_setsum *setsum)
+{
+ if (setsum == NULL)
+ return;
+ switch (setsum->type) {
+ case RTE_MEMBER_TYPE_HT:
+ rte_member_reset_ht(setsum);
+ return;
+ case RTE_MEMBER_TYPE_VBF:
+ rte_member_reset_vbf(setsum);
+ return;
+ default:
+ return;
+ }
+}
+
+RTE_INIT(librte_member_init_log);
+
+static void
+librte_member_init_log(void)
+{
+ librte_member_logtype = rte_log_register("librte.member");
+ if (librte_member_logtype >= 0)
+ rte_log_set_level(librte_member_logtype, RTE_LOG_DEBUG);
+}
diff --git a/lib/librte_member/rte_member.h b/lib/librte_member/rte_member.h
new file mode 100644
index 00000000..9b0c8f99
--- /dev/null
+++ b/lib/librte_member/rte_member.h
@@ -0,0 +1,513 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ *
+ * RTE Membership Library
+ *
+ * The Membership Library is an extension and generalization of a traditional
+ * filter (for example Bloom Filter and cuckoo filter) structure that has
+ * multiple usages in a variety of workloads and applications. The library is
+ * used to test if a key belongs to certain sets. Two types of such
+ * "set-summary" structures are implemented: hash-table based (HT) and vector
+ * bloom filter (vBF). For HT setsummary, two subtypes or modes are available,
+ * cache and non-cache modes. The table below summarize some properties of
+ * the different implementations.
+ *
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ */
+
+/**
+ * <!--
+ * +==========+=====================+================+=========================+
+ * | type | vbf | HT-cache | HT-non-cache |
+ * +==========+=====================+==========================================+
+ * |structure | bloom-filter array | hash-table like without storing key |
+ * +----------+---------------------+------------------------------------------+
+ * |set id | limited by bf count | [1, 0x7fff] |
+ * | | up to 32. | |
+ * +----------+---------------------+------------------------------------------+
+ * |usages & | small set range, | can delete, | cache most recent keys, |
+ * |properties| user-specified | big set range, | have both false-positive|
+ * | | false-positive rate,| small false | and false-negative |
+ * | | no deletion support.| positive depend| depend on table size, |
+ * | | | on table size, | automatic overwritten. |
+ * | | | new key does | |
+ * | | | not overwrite | |
+ * | | | existing key. | |
+ * +----------+---------------------+----------------+-------------------------+
+ * -->
+ */
+
+#ifndef _RTE_MEMBER_H_
+#define _RTE_MEMBER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+/** The set ID type that stored internally in hash table based set summary. */
+typedef uint16_t member_set_t;
+/** Invalid set ID used to mean no match found. */
+#define RTE_MEMBER_NO_MATCH 0
+/** Maximum size of hash table that can be created. */
+#define RTE_MEMBER_ENTRIES_MAX (1 << 30)
+/** Maximum number of keys that can be searched as a bulk */
+#define RTE_MEMBER_LOOKUP_BULK_MAX 64
+/** Entry count per bucket in hash table based mode. */
+#define RTE_MEMBER_BUCKET_ENTRIES 16
+/** Maximum number of characters in setsum name. */
+#define RTE_MEMBER_NAMESIZE 32
+
+/** @internal Hash function used by membership library. */
+#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32)
+#include <rte_hash_crc.h>
+#define MEMBER_HASH_FUNC rte_hash_crc
+#else
+#include <rte_jhash.h>
+#define MEMBER_HASH_FUNC rte_jhash
+#endif
+
+extern int librte_member_logtype;
+
+#define RTE_MEMBER_LOG(level, fmt, args...) \
+rte_log(RTE_LOG_ ## level, librte_member_logtype, "%s(): " fmt, \
+ __func__, ## args)
+
+/** @internal setsummary structure. */
+struct rte_member_setsum;
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Parameter struct used to create set summary
+ */
+struct rte_member_parameters;
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Define different set summary types
+ */
+enum rte_member_setsum_type {
+ RTE_MEMBER_TYPE_HT = 0, /**< Hash table based set summary. */
+ RTE_MEMBER_TYPE_VBF, /**< Vector of bloom filters. */
+ RTE_MEMBER_NUM_TYPE
+};
+
+/** @internal compare function for different arch. */
+enum rte_member_sig_compare_function {
+ RTE_MEMBER_COMPARE_SCALAR = 0,
+ RTE_MEMBER_COMPARE_AVX2,
+ RTE_MEMBER_COMPARE_NUM
+};
+
+/** @internal setsummary structure. */
+struct rte_member_setsum {
+ enum rte_member_setsum_type type; /* Type of the set summary. */
+ uint32_t key_len; /* Length of key. */
+ uint32_t prim_hash_seed; /* Primary hash function seed. */
+ uint32_t sec_hash_seed; /* Secondary hash function seed. */
+
+ /* Hash table based. */
+ uint32_t bucket_cnt; /* Number of buckets. */
+ uint32_t bucket_mask; /* Bit mask to get bucket index. */
+ /* For runtime selecting AVX, scalar, etc for signature comparison. */
+ enum rte_member_sig_compare_function sig_cmp_fn;
+ uint8_t cache; /* If it is cache mode for ht based. */
+
+ /* Vector bloom filter. */
+ uint32_t num_set; /* Number of set (bf) in vbf. */
+ uint32_t bits; /* Number of bits in each bf. */
+ uint32_t bit_mask; /* Bit mask to get bit location in bf. */
+ uint32_t num_hashes; /* Number of hash values to index bf. */
+
+ uint32_t mul_shift; /* vbf internal variable used during bit test. */
+ uint32_t div_shift; /* vbf internal variable used during bit test. */
+
+ void *table; /* This is the handler of hash table or vBF array. */
+
+
+ /* Second cache line should start here. */
+ uint32_t socket_id; /* NUMA Socket ID for memory. */
+ char name[RTE_MEMBER_NAMESIZE]; /* Name of this set summary. */
+} __rte_cache_aligned;
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Parameters used when create the set summary table. Currently user can
+ * specify two types of setsummary: HT based and vBF. For HT based, user can
+ * specify cache or non-cache mode. Here is a table to describe some differences
+ *
+ */
+struct rte_member_parameters {
+ const char *name; /**< Name of the hash. */
+
+ /**
+ * User to specify the type of the setsummary from one of
+ * rte_member_setsum_type.
+ *
+ * HT based setsummary is implemented like a hash table. User should use
+ * this type when there are many sets.
+ *
+ * vBF setsummary is a vector of bloom filters. It is used when number
+ * of sets is not big (less than 32 for current implementation).
+ */
+ enum rte_member_setsum_type type;
+
+ /**
+ * is_cache is only used for HT based setsummary.
+ *
+ * If it is HT based setsummary, user to specify the subtype or mode
+ * of the setsummary. It could be cache, or non-cache mode.
+ * Set is_cache to be 1 if to use as cache mode.
+ *
+ * For cache mode, keys can be evicted out of the HT setsummary. Keys
+ * with the same signature and map to the same bucket
+ * will overwrite each other in the setsummary table.
+ * This mode is useful for the case that the set-summary only
+ * needs to keep record of the recently inserted keys. Both
+ * false-negative and false-positive could happen.
+ *
+ * For non-cache mode, keys cannot be evicted out of the cache. So for
+ * this mode the setsummary will become full eventually. Keys with the
+ * same signature but map to the same bucket will still occupy multiple
+ * entries. This mode does not give false-negative result.
+ */
+ uint8_t is_cache;
+
+ /**
+ * For HT setsummary, num_keys equals to the number of entries of the
+ * table. When the number of keys inserted in the HT setsummary
+ * approaches this number, eviction could happen. For cache mode,
+ * keys could be evicted out of the table. For non-cache mode, keys will
+ * be evicted to other buckets like cuckoo hash. The table will also
+ * likely to become full before the number of inserted keys equal to the
+ * total number of entries.
+ *
+ * For vBF, num_keys equal to the expected number of keys that will
+ * be inserted into the vBF. The implementation assumes the keys are
+ * evenly distributed to each BF in vBF. This is used to calculate the
+ * number of bits we need for each BF. User does not specify the size of
+ * each BF directly because the optimal size depends on the num_keys
+ * and false positive rate.
+ */
+ uint32_t num_keys;
+
+ /**
+ * The length of key is used for hash calculation. Since key is not
+ * stored in set-summary, large key does not require more memory space.
+ */
+ uint32_t key_len;
+
+ /**
+ * num_set is only used for vBF, but not used for HT setsummary.
+ *
+ * num_set is equal to the number of BFs in vBF. For current
+ * implementation, it only supports 1,2,4,8,16,32 BFs in one vBF set
+ * summary. If other number of sets are needed, for example 5, the user
+ * should allocate the minimum available value that larger than 5,
+ * which is 8.
+ */
+ uint32_t num_set;
+
+ /**
+ * false_positive_rate is only used for vBF, but not used for HT
+ * setsummary.
+ *
+ * For vBF, false_positive_rate is the user-defined false positive rate
+ * given expected number of inserted keys (num_keys). It is used to
+ * calculate the total number of bits for each BF, and the number of
+ * hash values used during lookup and insertion. For details please
+ * refer to vBF implementation and membership library documentation.
+ *
+ * For HT, This parameter is not directly set by users.
+ * HT setsummary's false positive rate is in the order of:
+ * false_pos = (1/bucket_count)*(1/2^16), since we use 16-bit signature.
+ * This is because two keys needs to map to same bucket and same
+ * signature to have a collision (false positive). bucket_count is equal
+ * to number of entries (num_keys) divided by entry count per bucket
+ * (RTE_MEMBER_BUCKET_ENTRIES). Thus, the false_positive_rate is not
+ * directly set by users for HT mode.
+ */
+ float false_positive_rate;
+
+ /**
+ * We use two seeds to calculate two independent hashes for each key.
+ *
+ * For HT type, one hash is used as signature, and the other is used
+ * for bucket location.
+ * For vBF type, these two hashes and their combinations are used as
+ * hash locations to index the bit array.
+ */
+ uint32_t prim_hash_seed;
+
+ /**
+ * The secondary seed should be a different value from the primary seed.
+ */
+ uint32_t sec_hash_seed;
+
+ int socket_id; /**< NUMA Socket ID for memory. */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Find an existing set-summary and return a pointer to it.
+ *
+ * @param name
+ * Name of the set-summary.
+ * @return
+ * Pointer to the set-summary or NULL if object not found
+ * with rte_errno set appropriately. Possible rte_errno values include:
+ * - ENOENT - value not available for return
+ */
+struct rte_member_setsum *
+rte_member_find_existing(const char *name);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Create set-summary (SS).
+ *
+ * @param params
+ * Parameters to initialize the setsummary.
+ * @return
+ * Return the pointer to the setsummary.
+ * Return value is NULL if the creation failed.
+ */
+struct rte_member_setsum *
+rte_member_create(const struct rte_member_parameters *params);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Lookup key in set-summary (SS).
+ * Single key lookup and return as soon as the first match found
+ *
+ * @param setsum
+ * Pointer of a setsummary.
+ * @param key
+ * Pointer of the key to be looked up.
+ * @param set_id
+ * Output the set id matches the key.
+ * @return
+ * Return 1 for found a match and 0 for not found a match.
+ */
+int
+rte_member_lookup(const struct rte_member_setsum *setsum, const void *key,
+ member_set_t *set_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Lookup bulk of keys in set-summary (SS).
+ * Each key lookup returns as soon as the first match found
+ *
+ * @param setsum
+ * Pointer of a setsummary.
+ * @param keys
+ * Pointer of the bulk of keys to be looked up.
+ * @param num_keys
+ * Number of keys that will be lookup.
+ * @param set_ids
+ * Output set ids for all the keys to this array.
+ * User should preallocate array that can contain all results, which size is
+ * the num_keys.
+ * @return
+ * The number of keys that found a match.
+ */
+int
+rte_member_lookup_bulk(const struct rte_member_setsum *setsum,
+ const void **keys, uint32_t num_keys,
+ member_set_t *set_ids);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Lookup a key in set-summary (SS) for multiple matches.
+ * The key lookup will find all matched entries (multiple match).
+ * Note that for cache mode of HT, each key can have at most one match. This is
+ * because keys with same signature that maps to same bucket will overwrite
+ * each other. So multi-match lookup should be used for vBF and non-cache HT.
+ *
+ * @param setsum
+ * Pointer of a set-summary.
+ * @param key
+ * Pointer of the key that to be looked up.
+ * @param max_match_per_key
+ * User specified maximum number of matches for each key. The function returns
+ * as soon as this number of matches found for the key.
+ * @param set_id
+ * Output set ids for all the matches of the key. User needs to preallocate
+ * the array that can contain max_match_per_key number of results.
+ * @return
+ * The number of matches that found for the key.
+ * For cache mode HT set-summary, the number should be at most 1.
+ */
+int
+rte_member_lookup_multi(const struct rte_member_setsum *setsum,
+ const void *key, uint32_t max_match_per_key,
+ member_set_t *set_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Lookup a bulk of keys in set-summary (SS) for multiple matches each key.
+ * Each key lookup will find all matched entries (multiple match).
+ * Note that for cache mode HT, each key can have at most one match. So
+ * multi-match function is mainly used for vBF and non-cache mode HT.
+ *
+ * @param setsum
+ * Pointer of a setsummary.
+ * @param keys
+ * Pointer of the keys to be looked up.
+ * @param num_keys
+ * The number of keys that will be lookup.
+ * @param max_match_per_key
+ * The possible maximum number of matches for each key.
+ * @param match_count
+ * Output the number of matches for each key in an array.
+ * @param set_ids
+ * Return set ids for all the matches of all keys. Users pass in a
+ * preallocated 2D array with first dimension as key index and second
+ * dimension as match index. For example set_ids[bulk_size][max_match_per_key]
+ * @return
+ * The number of keys that found one or more matches in the set-summary.
+ */
+int
+rte_member_lookup_multi_bulk(const struct rte_member_setsum *setsum,
+ const void **keys, uint32_t num_keys,
+ uint32_t max_match_per_key,
+ uint32_t *match_count,
+ member_set_t *set_ids);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Insert key into set-summary (SS).
+ *
+ * @param setsum
+ * Pointer of a set-summary.
+ * @param key
+ * Pointer of the key to be added.
+ * @param set_id
+ * The set id associated with the key that needs to be added. Different mode
+ * supports different set_id ranges. 0 cannot be used as set_id since
+ * RTE_MEMBER_NO_MATCH by default is set as 0.
+ * For HT mode, the set_id has range as [1, 0x7FFF], MSB is reserved.
+ * For vBF mode the set id is limited by the num_set parameter when create
+ * the set-summary.
+ * @return
+ * HT (cache mode) and vBF should never fail unless the set_id is not in the
+ * valid range. In such case -EINVAL is returned.
+ * For HT (non-cache mode) it could fail with -ENOSPC error code when table is
+ * full.
+ * For success it returns different values for different modes to provide
+ * extra information for users.
+ * Return 0 for HT (cache mode) if the add does not cause
+ * eviction, return 1 otherwise. Return 0 for non-cache mode if success,
+ * -ENOSPC for full, and 1 if cuckoo eviction happens.
+ * Always returns 0 for vBF mode.
+ */
+int
+rte_member_add(const struct rte_member_setsum *setsum, const void *key,
+ member_set_t set_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * De-allocate memory used by set-summary.
+ *
+ * @param setsum
+ * Pointer to the set summary.
+ */
+void
+rte_member_free(struct rte_member_setsum *setsum);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Reset the set-summary tables. E.g. reset bits to be 0 in BF,
+ * reset set_id in each entry to be RTE_MEMBER_NO_MATCH in HT based SS.
+ *
+ * @param setsum
+ * Pointer to the set-summary.
+ */
+void
+rte_member_reset(const struct rte_member_setsum *setsum);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Delete items from the set-summary. Note that vBF does not support deletion
+ * in current implementation. For vBF, error code of -EINVAL will be returned.
+ *
+ * @param setsum
+ * Pointer to the set-summary.
+ * @param key
+ * Pointer of the key to be deleted.
+ * @param set_id
+ * For HT mode, we need both key and its corresponding set_id to
+ * properly delete the key. Without set_id, we may delete other keys with the
+ * same signature.
+ * @return
+ * If no entry found to delete, an error code of -ENOENT could be returned.
+ */
+int
+rte_member_delete(const struct rte_member_setsum *setsum, const void *key,
+ member_set_t set_id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMBER_H_ */
diff --git a/lib/librte_member/rte_member_ht.c b/lib/librte_member/rte_member_ht.c
new file mode 100644
index 00000000..59332d56
--- /dev/null
+++ b/lib/librte_member/rte_member_ht.c
@@ -0,0 +1,586 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_prefetch.h>
+#include <rte_random.h>
+#include <rte_log.h>
+
+#include "rte_member.h"
+#include "rte_member_ht.h"
+
+#if defined(RTE_ARCH_X86)
+#include "rte_member_x86.h"
+#endif
+
+/* Search bucket for entry with tmp_sig and update set_id */
+static inline int
+update_entry_search(uint32_t bucket_id, member_sig_t tmp_sig,
+ struct member_ht_bucket *buckets,
+ member_set_t set_id)
+{
+ uint32_t i;
+
+ for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
+ if (buckets[bucket_id].sigs[i] == tmp_sig) {
+ buckets[bucket_id].sets[i] = set_id;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static inline int
+search_bucket_single(uint32_t bucket_id, member_sig_t tmp_sig,
+ struct member_ht_bucket *buckets,
+ member_set_t *set_id)
+{
+ uint32_t iter;
+
+ for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
+ if (tmp_sig == buckets[bucket_id].sigs[iter] &&
+ buckets[bucket_id].sets[iter] !=
+ RTE_MEMBER_NO_MATCH) {
+ *set_id = buckets[bucket_id].sets[iter];
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static inline void
+search_bucket_multi(uint32_t bucket_id, member_sig_t tmp_sig,
+ struct member_ht_bucket *buckets,
+ uint32_t *counter,
+ uint32_t matches_per_key,
+ member_set_t *set_id)
+{
+ uint32_t iter;
+
+ for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
+ if (tmp_sig == buckets[bucket_id].sigs[iter] &&
+ buckets[bucket_id].sets[iter] !=
+ RTE_MEMBER_NO_MATCH) {
+ set_id[*counter] = buckets[bucket_id].sets[iter];
+ (*counter)++;
+ if (*counter >= matches_per_key)
+ return;
+ }
+ }
+}
+
+int
+rte_member_create_ht(struct rte_member_setsum *ss,
+ const struct rte_member_parameters *params)
+{
+ uint32_t i, j;
+ uint32_t size_bucket_t;
+ uint32_t num_entries = rte_align32pow2(params->num_keys);
+
+ if ((num_entries > RTE_MEMBER_ENTRIES_MAX) ||
+ !rte_is_power_of_2(RTE_MEMBER_BUCKET_ENTRIES) ||
+ num_entries < RTE_MEMBER_BUCKET_ENTRIES) {
+ rte_errno = EINVAL;
+ RTE_MEMBER_LOG(ERR,
+ "Membership HT create with invalid parameters\n");
+ return -EINVAL;
+ }
+
+ uint32_t num_buckets = num_entries / RTE_MEMBER_BUCKET_ENTRIES;
+
+ size_bucket_t = sizeof(struct member_ht_bucket);
+
+ struct member_ht_bucket *buckets = rte_zmalloc_socket(NULL,
+ num_buckets * size_bucket_t,
+ RTE_CACHE_LINE_SIZE, ss->socket_id);
+
+ if (buckets == NULL) {
+ RTE_MEMBER_LOG(ERR, "memory allocation failed for HT "
+ "setsummary\n");
+ return -ENOMEM;
+ }
+
+ ss->table = buckets;
+ ss->bucket_cnt = num_buckets;
+ ss->bucket_mask = num_buckets - 1;
+ ss->cache = params->is_cache;
+
+ for (i = 0; i < num_buckets; i++) {
+ for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
+ buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;
+ }
+#if defined(RTE_ARCH_X86)
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+ RTE_MEMBER_BUCKET_ENTRIES == 16)
+ ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2;
+ else
+#endif
+ ss->sig_cmp_fn = RTE_MEMBER_COMPARE_SCALAR;
+
+ RTE_MEMBER_LOG(DEBUG, "Hash table based filter created, "
+ "the table has %u entries, %u buckets\n",
+ num_entries, num_buckets);
+ return 0;
+}
+
+static inline void
+get_buckets_index(const struct rte_member_setsum *ss, const void *key,
+ uint32_t *prim_bkt, uint32_t *sec_bkt, member_sig_t *sig)
+{
+ uint32_t first_hash = MEMBER_HASH_FUNC(key, ss->key_len,
+ ss->prim_hash_seed);
+ uint32_t sec_hash = MEMBER_HASH_FUNC(&first_hash, sizeof(uint32_t),
+ ss->sec_hash_seed);
+ /*
+ * We use the first hash value for the signature, and the second hash
+ * value to derive the primary and secondary bucket locations.
+ *
+ * For non-cache mode, we use the lower bits for the primary bucket
+ * location. Then we xor primary bucket location and the signature
+ * to get the secondary bucket location. This is called "partial-key
+ * cuckoo hashing" proposed by B. Fan, et al's paper
+ * "Cuckoo Filter: Practically Better Than Bloom". The benefit to use
+ * xor is that one could derive the alternative bucket location
+ * by only using the current bucket location and the signature. This is
+ * generally required by non-cache mode's eviction and deletion
+ * process without the need to store alternative hash value nor the full
+ * key.
+ *
+ * For cache mode, we use the lower bits for the primary bucket
+ * location and the higher bits for the secondary bucket location. In
+ * cache mode, keys are simply overwritten if bucket is full. We do not
+ * use xor since lower/higher bits are more independent hash values thus
+ * should provide slightly better table load.
+ */
+ *sig = first_hash;
+ if (ss->cache) {
+ *prim_bkt = sec_hash & ss->bucket_mask;
+ *sec_bkt = (sec_hash >> 16) & ss->bucket_mask;
+ } else {
+ *prim_bkt = sec_hash & ss->bucket_mask;
+ *sec_bkt = (*prim_bkt ^ *sig) & ss->bucket_mask;
+ }
+}
+
+int
+rte_member_lookup_ht(const struct rte_member_setsum *ss,
+ const void *key, member_set_t *set_id)
+{
+ uint32_t prim_bucket, sec_bucket;
+ member_sig_t tmp_sig;
+ struct member_ht_bucket *buckets = ss->table;
+
+ *set_id = RTE_MEMBER_NO_MATCH;
+ get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
+
+ switch (ss->sig_cmp_fn) {
+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+ case RTE_MEMBER_COMPARE_AVX2:
+ if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets,
+ set_id) ||
+ search_bucket_single_avx(sec_bucket, tmp_sig,
+ buckets, set_id))
+ return 1;
+ break;
+#endif
+ default:
+ if (search_bucket_single(prim_bucket, tmp_sig, buckets,
+ set_id) ||
+ search_bucket_single(sec_bucket, tmp_sig,
+ buckets, set_id))
+ return 1;
+ }
+
+ return 0;
+}
+
+uint32_t
+rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss,
+ const void **keys, uint32_t num_keys, member_set_t *set_id)
+{
+ uint32_t i;
+ uint32_t num_matches = 0;
+ struct member_ht_bucket *buckets = ss->table;
+ member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
+ uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
+ uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
+
+ for (i = 0; i < num_keys; i++) {
+ get_buckets_index(ss, keys[i], &prim_buckets[i],
+ &sec_buckets[i], &tmp_sig[i]);
+ rte_prefetch0(&buckets[prim_buckets[i]]);
+ rte_prefetch0(&buckets[sec_buckets[i]]);
+ }
+
+ for (i = 0; i < num_keys; i++) {
+ switch (ss->sig_cmp_fn) {
+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+ case RTE_MEMBER_COMPARE_AVX2:
+ if (search_bucket_single_avx(prim_buckets[i],
+ tmp_sig[i], buckets, &set_id[i]) ||
+ search_bucket_single_avx(sec_buckets[i],
+ tmp_sig[i], buckets, &set_id[i]))
+ num_matches++;
+ else
+ set_id[i] = RTE_MEMBER_NO_MATCH;
+ break;
+#endif
+ default:
+ if (search_bucket_single(prim_buckets[i], tmp_sig[i],
+ buckets, &set_id[i]) ||
+ search_bucket_single(sec_buckets[i],
+ tmp_sig[i], buckets, &set_id[i]))
+ num_matches++;
+ else
+ set_id[i] = RTE_MEMBER_NO_MATCH;
+ }
+ }
+ return num_matches;
+}
+
+uint32_t
+rte_member_lookup_multi_ht(const struct rte_member_setsum *ss,
+ const void *key, uint32_t match_per_key,
+ member_set_t *set_id)
+{
+ uint32_t num_matches = 0;
+ uint32_t prim_bucket, sec_bucket;
+ member_sig_t tmp_sig;
+ struct member_ht_bucket *buckets = ss->table;
+
+ get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
+
+ switch (ss->sig_cmp_fn) {
+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+ case RTE_MEMBER_COMPARE_AVX2:
+ search_bucket_multi_avx(prim_bucket, tmp_sig, buckets,
+ &num_matches, match_per_key, set_id);
+ if (num_matches < match_per_key)
+ search_bucket_multi_avx(sec_bucket, tmp_sig,
+ buckets, &num_matches, match_per_key, set_id);
+ return num_matches;
+#endif
+ default:
+ search_bucket_multi(prim_bucket, tmp_sig, buckets, &num_matches,
+ match_per_key, set_id);
+ if (num_matches < match_per_key)
+ search_bucket_multi(sec_bucket, tmp_sig,
+ buckets, &num_matches, match_per_key, set_id);
+ return num_matches;
+ }
+}
+
+uint32_t
+rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss,
+ const void **keys, uint32_t num_keys, uint32_t match_per_key,
+ uint32_t *match_count,
+ member_set_t *set_ids)
+{
+ uint32_t i;
+ uint32_t num_matches = 0;
+ struct member_ht_bucket *buckets = ss->table;
+ uint32_t match_cnt_tmp;
+ member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
+ uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
+ uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
+
+ for (i = 0; i < num_keys; i++) {
+ get_buckets_index(ss, keys[i], &prim_buckets[i],
+ &sec_buckets[i], &tmp_sig[i]);
+ rte_prefetch0(&buckets[prim_buckets[i]]);
+ rte_prefetch0(&buckets[sec_buckets[i]]);
+ }
+ for (i = 0; i < num_keys; i++) {
+ match_cnt_tmp = 0;
+
+ switch (ss->sig_cmp_fn) {
+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+ case RTE_MEMBER_COMPARE_AVX2:
+ search_bucket_multi_avx(prim_buckets[i], tmp_sig[i],
+ buckets, &match_cnt_tmp, match_per_key,
+ &set_ids[i*match_per_key]);
+ if (match_cnt_tmp < match_per_key)
+ search_bucket_multi_avx(sec_buckets[i],
+ tmp_sig[i], buckets, &match_cnt_tmp,
+ match_per_key,
+ &set_ids[i*match_per_key]);
+ match_count[i] = match_cnt_tmp;
+ if (match_cnt_tmp != 0)
+ num_matches++;
+ break;
+#endif
+ default:
+ search_bucket_multi(prim_buckets[i], tmp_sig[i],
+ buckets, &match_cnt_tmp, match_per_key,
+ &set_ids[i*match_per_key]);
+ if (match_cnt_tmp < match_per_key)
+ search_bucket_multi(sec_buckets[i], tmp_sig[i],
+ buckets, &match_cnt_tmp, match_per_key,
+ &set_ids[i*match_per_key]);
+ match_count[i] = match_cnt_tmp;
+ if (match_cnt_tmp != 0)
+ num_matches++;
+ }
+ }
+ return num_matches;
+}
+
+static inline int
+try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
+ member_sig_t sig, member_set_t set_id)
+{
+ int i;
+ /* If not full then insert into one slot */
+ for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
+ if (buckets[prim].sets[i] == RTE_MEMBER_NO_MATCH) {
+ buckets[prim].sigs[i] = sig;
+ buckets[prim].sets[i] = set_id;
+ return 0;
+ }
+ }
+ /* If prim failed, we need to access second bucket */
+ for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
+ if (buckets[sec].sets[i] == RTE_MEMBER_NO_MATCH) {
+ buckets[sec].sigs[i] = sig;
+ buckets[sec].sets[i] = set_id;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+static inline int
+try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
+ member_sig_t sig, member_set_t set_id,
+ enum rte_member_sig_compare_function cmp_fn)
+{
+ switch (cmp_fn) {
+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+ case RTE_MEMBER_COMPARE_AVX2:
+ if (update_entry_search_avx(prim, sig, buckets, set_id) ||
+ update_entry_search_avx(sec, sig, buckets,
+ set_id))
+ return 0;
+ break;
+#endif
+ default:
+ if (update_entry_search(prim, sig, buckets, set_id) ||
+ update_entry_search(sec, sig, buckets,
+ set_id))
+ return 0;
+ }
+ return -1;
+}
+
+static inline int
+evict_from_bucket(void)
+{
+ /* For now, we randomly pick one entry to evict */
+ return rte_rand() & (RTE_MEMBER_BUCKET_ENTRIES - 1);
+}
+
+/*
+ * This function is similar to the cuckoo hash make_space function in hash
+ * library
+ */
+static inline int
+make_space_bucket(const struct rte_member_setsum *ss, uint32_t bkt_idx,
+ unsigned int *nr_pushes)
+{
+ unsigned int i, j;
+ int ret;
+ struct member_ht_bucket *buckets = ss->table;
+ uint32_t next_bucket_idx;
+ struct member_ht_bucket *next_bkt[RTE_MEMBER_BUCKET_ENTRIES];
+ struct member_ht_bucket *bkt = &buckets[bkt_idx];
+ /* MSB is set to indicate if an entry has been already pushed */
+ member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
+
+ /*
+ * Push existing item (search for bucket with space in
+ * alternative locations) to its alternative location
+ */
+ for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
+ /* Search for space in alternative locations */
+ next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask;
+ next_bkt[i] = &buckets[next_bucket_idx];
+ for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) {
+ if (next_bkt[i]->sets[j] == RTE_MEMBER_NO_MATCH)
+ break;
+ }
+
+ if (j != RTE_MEMBER_BUCKET_ENTRIES)
+ break;
+ }
+
+ /* Alternative location has spare room (end of recursive function) */
+ if (i != RTE_MEMBER_BUCKET_ENTRIES) {
+ next_bkt[i]->sigs[j] = bkt->sigs[i];
+ next_bkt[i]->sets[j] = bkt->sets[i];
+ return i;
+ }
+
+ /* Pick entry that has not been pushed yet */
+ for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++)
+ if ((bkt->sets[i] & flag_mask) == 0)
+ break;
+
+ /* All entries have been pushed, so entry cannot be added */
+ if (i == RTE_MEMBER_BUCKET_ENTRIES ||
+ ++(*nr_pushes) > RTE_MEMBER_MAX_PUSHES)
+ return -ENOSPC;
+
+ next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask;
+ /* Set flag to indicate that this entry is going to be pushed */
+ bkt->sets[i] |= flag_mask;
+
+ /* Need room in alternative bucket to insert the pushed entry */
+ ret = make_space_bucket(ss, next_bucket_idx, nr_pushes);
+ /*
+ * After recursive function.
+ * Clear flags and insert the pushed entry
+ * in its alternative location if successful,
+ * or return error
+ */
+ bkt->sets[i] &= ~flag_mask;
+ if (ret >= 0) {
+ next_bkt[i]->sigs[ret] = bkt->sigs[i];
+ next_bkt[i]->sets[ret] = bkt->sets[i];
+ return i;
+ } else
+ return ret;
+}
+
+int
+rte_member_add_ht(const struct rte_member_setsum *ss,
+ const void *key, member_set_t set_id)
+{
+ int ret;
+ unsigned int nr_pushes = 0;
+ uint32_t prim_bucket, sec_bucket;
+ member_sig_t tmp_sig;
+ struct member_ht_bucket *buckets = ss->table;
+ member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
+
+ if (set_id == RTE_MEMBER_NO_MATCH || (set_id & flag_mask) != 0)
+ return -EINVAL;
+
+ get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
+
+ /*
+ * If it is cache based setsummary, we try overwriting (updating)
+ * existing entry with the same signature first. In cache mode, we allow
+ * false negatives and only cache the most recent keys.
+ *
+ * For non-cache mode, we do not update existing entry with the same
+ * signature. This is because if two keys with same signature update
+ * each other, false negative may happen, which is not the expected
+ * behavior for non-cache setsummary.
+ */
+ if (ss->cache) {
+ ret = try_update(buckets, prim_bucket, sec_bucket, tmp_sig,
+ set_id, ss->sig_cmp_fn);
+ if (ret != -1)
+ return ret;
+ }
+ /* If not full then insert into one slot */
+ ret = try_insert(buckets, prim_bucket, sec_bucket, tmp_sig, set_id);
+ if (ret != -1)
+ return ret;
+
+ /* Random pick prim or sec for recursive displacement */
+ uint32_t select_bucket = (tmp_sig && 1U) ? prim_bucket : sec_bucket;
+ if (ss->cache) {
+ ret = evict_from_bucket();
+ buckets[select_bucket].sigs[ret] = tmp_sig;
+ buckets[select_bucket].sets[ret] = set_id;
+ return 1;
+ }
+
+ ret = make_space_bucket(ss, select_bucket, &nr_pushes);
+ if (ret >= 0) {
+ buckets[select_bucket].sigs[ret] = tmp_sig;
+ buckets[select_bucket].sets[ret] = set_id;
+ ret = 1;
+ }
+
+ return ret;
+}
+
+void
+rte_member_free_ht(struct rte_member_setsum *ss)
+{
+ rte_free(ss->table);
+}
+
+int
+rte_member_delete_ht(const struct rte_member_setsum *ss, const void *key,
+ member_set_t set_id)
+{
+ int i;
+ uint32_t prim_bucket, sec_bucket;
+ member_sig_t tmp_sig;
+ struct member_ht_bucket *buckets = ss->table;
+
+ get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
+
+ for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
+ if (tmp_sig == buckets[prim_bucket].sigs[i] &&
+ set_id == buckets[prim_bucket].sets[i]) {
+ buckets[prim_bucket].sets[i] = RTE_MEMBER_NO_MATCH;
+ return 0;
+ }
+ }
+
+ for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
+ if (tmp_sig == buckets[sec_bucket].sigs[i] &&
+ set_id == buckets[sec_bucket].sets[i]) {
+ buckets[sec_bucket].sets[i] = RTE_MEMBER_NO_MATCH;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+void
+rte_member_reset_ht(const struct rte_member_setsum *ss)
+{
+ uint32_t i, j;
+ struct member_ht_bucket *buckets = ss->table;
+
+ for (i = 0; i < ss->bucket_cnt; i++) {
+ for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
+ buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;
+ }
+}
diff --git a/lib/librte_member/rte_member_ht.h b/lib/librte_member/rte_member_ht.h
new file mode 100644
index 00000000..3148a492
--- /dev/null
+++ b/lib/librte_member/rte_member_ht.h
@@ -0,0 +1,94 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMBER_HT_H_
+#define _RTE_MEMBER_HT_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Maximum number of pushes for cuckoo path in HT mode. */
+#define RTE_MEMBER_MAX_PUSHES 50
+
+typedef uint16_t member_sig_t; /* signature size is 16 bit */
+
+/* The bucket struct for ht setsum */
+struct member_ht_bucket {
+ member_sig_t sigs[RTE_MEMBER_BUCKET_ENTRIES]; /* 2-byte signature */
+ member_set_t sets[RTE_MEMBER_BUCKET_ENTRIES]; /* 2-byte set */
+} __rte_cache_aligned;
+
+int
+rte_member_create_ht(struct rte_member_setsum *ss,
+ const struct rte_member_parameters *params);
+
+int
+rte_member_lookup_ht(const struct rte_member_setsum *setsum,
+ const void *key, member_set_t *set_id);
+
+uint32_t
+rte_member_lookup_bulk_ht(const struct rte_member_setsum *setsum,
+ const void **keys, uint32_t num_keys,
+ member_set_t *set_ids);
+
+uint32_t
+rte_member_lookup_multi_ht(const struct rte_member_setsum *setsum,
+ const void *key, uint32_t match_per_key,
+ member_set_t *set_id);
+
+uint32_t
+rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *setsum,
+ const void **keys, uint32_t num_keys, uint32_t match_per_key,
+ uint32_t *match_count,
+ member_set_t *set_ids);
+
+int
+rte_member_add_ht(const struct rte_member_setsum *setsum,
+ const void *key, member_set_t set_id);
+
+void
+rte_member_free_ht(struct rte_member_setsum *setsum);
+
+int
+rte_member_delete_ht(const struct rte_member_setsum *ss, const void *key,
+ member_set_t set_id);
+
+void
+rte_member_reset_ht(const struct rte_member_setsum *setsum);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMBER_HT_H_ */
diff --git a/lib/librte_member/rte_member_vbf.c b/lib/librte_member/rte_member_vbf.c
new file mode 100644
index 00000000..1a98ac84
--- /dev/null
+++ b/lib/librte_member/rte_member_vbf.c
@@ -0,0 +1,350 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <math.h>
+#include <string.h>
+
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_errno.h>
+#include <rte_log.h>
+
+#include "rte_member.h"
+#include "rte_member_vbf.h"
+
+/*
+ * vBF currently implemented as a big array.
+ * The BFs have a vertical layout. Bits in same location of all bfs will stay
+ * in the same cache line.
+ * For example, if we have 32 bloom filters, we use a uint32_t array to
+ * represent all of them. array[0] represent the first location of all the
+ * bloom filters, array[1] represents the second location of all the
+ * bloom filters, etc. The advantage of this layout is to minimize the average
+ * number of memory accesses to test all bloom filters.
+ *
+ * Currently the implementation supports vBF containing 1,2,4,8,16,32 BFs.
+ */
+int
+rte_member_create_vbf(struct rte_member_setsum *ss,
+ const struct rte_member_parameters *params)
+{
+
+ if (params->num_set > RTE_MEMBER_MAX_BF ||
+ !rte_is_power_of_2(params->num_set) ||
+ params->num_keys == 0 ||
+ params->false_positive_rate == 0 ||
+ params->false_positive_rate > 1) {
+ rte_errno = EINVAL;
+ RTE_MEMBER_LOG(ERR, "Membership vBF create with invalid parameters\n");
+ return -EINVAL;
+ }
+
+ /* We assume expected keys evenly distribute to all BFs */
+ uint32_t num_keys_per_bf = 1 + (params->num_keys - 1) / ss->num_set;
+
+ /*
+ * Note that the false positive rate is for all BFs in the vBF
+ * such that the single BF's false positive rate needs to be
+ * calculated.
+ * Assume each BF's False positive rate is fp_one_bf. The total false
+ * positive rate is fp = 1-(1-fp_one_bf)^n.
+ * => fp_one_bf = 1 - (1-fp)^(1/n)
+ */
+
+ float fp_one_bf = 1 - pow((1 - params->false_positive_rate),
+ 1.0 / ss->num_set);
+
+ if (fp_one_bf == 0) {
+ rte_errno = EINVAL;
+ RTE_MEMBER_LOG(ERR, "Membership BF false positive rate is too small\n");
+ return -EINVAL;
+ }
+
+ uint32_t bits = ceil((num_keys_per_bf *
+ log(fp_one_bf)) /
+ log(1.0 / (pow(2.0, log(2.0)))));
+
+ /* We round to power of 2 for performance during lookup */
+ ss->bits = rte_align32pow2(bits);
+
+ ss->num_hashes = (uint32_t)(log(2.0) * bits / num_keys_per_bf);
+ ss->bit_mask = ss->bits - 1;
+
+ /*
+ * Since we round the bits to power of 2, the final false positive
+ * rate will probably not be same as the user specified. We log the
+ * new value as debug message.
+ */
+ float new_fp = pow((1 - pow((1 - 1.0 / ss->bits), num_keys_per_bf *
+ ss->num_hashes)), ss->num_hashes);
+ new_fp = 1 - pow((1 - new_fp), ss->num_set);
+
+ /*
+ * Reduce hash function count, until we approach the user specified
+ * false-positive rate. Otherwise it is too conservative
+ */
+ int tmp_num_hash = ss->num_hashes;
+
+ while (tmp_num_hash > 1) {
+ float tmp_fp = new_fp;
+
+ tmp_num_hash--;
+ new_fp = pow((1 - pow((1 - 1.0 / ss->bits), num_keys_per_bf *
+ tmp_num_hash)), tmp_num_hash);
+ new_fp = 1 - pow((1 - new_fp), ss->num_set);
+
+ if (new_fp > params->false_positive_rate) {
+ new_fp = tmp_fp;
+ tmp_num_hash++;
+ break;
+ }
+ }
+
+ ss->num_hashes = tmp_num_hash;
+
+ /*
+ * To avoid multiplication and division:
+ * mul_shift is used for multiplication shift during bit test
+ * div_shift is used for division shift, to be divided by number of bits
+ * represented by a uint32_t variable
+ */
+ ss->mul_shift = __builtin_ctzl(ss->num_set);
+ ss->div_shift = __builtin_ctzl(32 >> ss->mul_shift);
+
+ RTE_MEMBER_LOG(DEBUG, "vector bloom filter created, "
+ "each bloom filter expects %u keys, needs %u bits, %u hashes, "
+ "with false positive rate set as %.5f, "
+ "The new calculated vBF false positive rate is %.5f\n",
+ num_keys_per_bf, ss->bits, ss->num_hashes, fp_one_bf, new_fp);
+
+ ss->table = rte_zmalloc_socket(NULL, ss->num_set * (ss->bits >> 3),
+ RTE_CACHE_LINE_SIZE, ss->socket_id);
+ if (ss->table == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static inline uint32_t
+test_bit(uint32_t bit_loc, const struct rte_member_setsum *ss)
+{
+ uint32_t *vbf = ss->table;
+ uint32_t n = ss->num_set;
+ uint32_t div_shift = ss->div_shift;
+ uint32_t mul_shift = ss->mul_shift;
+ /*
+ * a is how many bits in one BF are represented by one 32bit
+ * variable.
+ */
+ uint32_t a = 32 >> mul_shift;
+ /*
+ * x>>b is the divide, x & (a-1) is the mod, & (1<<n-1) to mask out bits
+ * we do not need
+ */
+ return (vbf[bit_loc >> div_shift] >>
+ ((bit_loc & (a - 1)) << mul_shift)) & ((1ULL << n) - 1);
+}
+
+static inline void
+set_bit(uint32_t bit_loc, const struct rte_member_setsum *ss, int32_t set)
+{
+ uint32_t *vbf = ss->table;
+ uint32_t div_shift = ss->div_shift;
+ uint32_t mul_shift = ss->mul_shift;
+ uint32_t a = 32 >> mul_shift;
+
+ vbf[bit_loc >> div_shift] |=
+ 1UL << (((bit_loc & (a - 1)) << mul_shift) + set - 1);
+}
+
+int
+rte_member_lookup_vbf(const struct rte_member_setsum *ss, const void *key,
+ member_set_t *set_id)
+{
+ uint32_t j;
+ uint32_t h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed);
+ uint32_t h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t),
+ ss->sec_hash_seed);
+ uint32_t mask = ~0;
+ uint32_t bit_loc;
+
+ for (j = 0; j < ss->num_hashes; j++) {
+ bit_loc = (h1 + j * h2) & ss->bit_mask;
+ mask &= test_bit(bit_loc, ss);
+ }
+
+ if (mask) {
+ *set_id = __builtin_ctzl(mask) + 1;
+ return 1;
+ }
+
+ *set_id = RTE_MEMBER_NO_MATCH;
+ return 0;
+}
+
+uint32_t
+rte_member_lookup_bulk_vbf(const struct rte_member_setsum *ss,
+ const void **keys, uint32_t num_keys, member_set_t *set_ids)
+{
+ uint32_t i, k;
+ uint32_t num_matches = 0;
+ uint32_t mask[RTE_MEMBER_LOOKUP_BULK_MAX];
+ uint32_t h1[RTE_MEMBER_LOOKUP_BULK_MAX], h2[RTE_MEMBER_LOOKUP_BULK_MAX];
+ uint32_t bit_loc;
+
+ for (i = 0; i < num_keys; i++)
+ h1[i] = MEMBER_HASH_FUNC(keys[i], ss->key_len,
+ ss->prim_hash_seed);
+ for (i = 0; i < num_keys; i++)
+ h2[i] = MEMBER_HASH_FUNC(&h1[i], sizeof(uint32_t),
+ ss->sec_hash_seed);
+ for (i = 0; i < num_keys; i++) {
+ mask[i] = ~0;
+ for (k = 0; k < ss->num_hashes; k++) {
+ bit_loc = (h1[i] + k * h2[i]) & ss->bit_mask;
+ mask[i] &= test_bit(bit_loc, ss);
+ }
+ }
+ for (i = 0; i < num_keys; i++) {
+ if (mask[i]) {
+ set_ids[i] = __builtin_ctzl(mask[i]) + 1;
+ num_matches++;
+ } else
+ set_ids[i] = RTE_MEMBER_NO_MATCH;
+ }
+ return num_matches;
+}
+
+uint32_t
+rte_member_lookup_multi_vbf(const struct rte_member_setsum *ss,
+ const void *key, uint32_t match_per_key,
+ member_set_t *set_id)
+{
+ uint32_t num_matches = 0;
+ uint32_t j;
+ uint32_t h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed);
+ uint32_t h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t),
+ ss->sec_hash_seed);
+ uint32_t mask = ~0;
+ uint32_t bit_loc;
+
+ for (j = 0; j < ss->num_hashes; j++) {
+ bit_loc = (h1 + j * h2) & ss->bit_mask;
+ mask &= test_bit(bit_loc, ss);
+ }
+ while (mask) {
+ uint32_t loc = __builtin_ctzl(mask);
+ set_id[num_matches] = loc + 1;
+ num_matches++;
+ if (num_matches >= match_per_key)
+ return num_matches;
+ mask &= ~(1UL << loc);
+ }
+ return num_matches;
+}
+
+uint32_t
+rte_member_lookup_multi_bulk_vbf(const struct rte_member_setsum *ss,
+ const void **keys, uint32_t num_keys, uint32_t match_per_key,
+ uint32_t *match_count,
+ member_set_t *set_ids)
+{
+ uint32_t i, k;
+ uint32_t num_matches = 0;
+ uint32_t match_cnt_t;
+ uint32_t mask[RTE_MEMBER_LOOKUP_BULK_MAX];
+ uint32_t h1[RTE_MEMBER_LOOKUP_BULK_MAX], h2[RTE_MEMBER_LOOKUP_BULK_MAX];
+ uint32_t bit_loc;
+
+ for (i = 0; i < num_keys; i++)
+ h1[i] = MEMBER_HASH_FUNC(keys[i], ss->key_len,
+ ss->prim_hash_seed);
+ for (i = 0; i < num_keys; i++)
+ h2[i] = MEMBER_HASH_FUNC(&h1[i], sizeof(uint32_t),
+ ss->sec_hash_seed);
+ for (i = 0; i < num_keys; i++) {
+ mask[i] = ~0;
+ for (k = 0; k < ss->num_hashes; k++) {
+ bit_loc = (h1[i] + k * h2[i]) & ss->bit_mask;
+ mask[i] &= test_bit(bit_loc, ss);
+ }
+ }
+ for (i = 0; i < num_keys; i++) {
+ match_cnt_t = 0;
+ while (mask[i]) {
+ uint32_t loc = __builtin_ctzl(mask[i]);
+ set_ids[i * match_per_key + match_cnt_t] = loc + 1;
+ match_cnt_t++;
+ if (match_cnt_t >= match_per_key)
+ break;
+ mask[i] &= ~(1UL << loc);
+ }
+ match_count[i] = match_cnt_t;
+ if (match_cnt_t != 0)
+ num_matches++;
+ }
+ return num_matches;
+}
+
+int
+rte_member_add_vbf(const struct rte_member_setsum *ss,
+ const void *key, member_set_t set_id)
+{
+ uint32_t i, h1, h2;
+ uint32_t bit_loc;
+
+ if (set_id > ss->num_set || set_id == RTE_MEMBER_NO_MATCH)
+ return -EINVAL;
+
+ h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed);
+ h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t), ss->sec_hash_seed);
+
+ for (i = 0; i < ss->num_hashes; i++) {
+ bit_loc = (h1 + i * h2) & ss->bit_mask;
+ set_bit(bit_loc, ss, set_id);
+ }
+ return 0;
+}
+
+void
+rte_member_free_vbf(struct rte_member_setsum *ss)
+{
+ rte_free(ss->table);
+}
+
+void
+rte_member_reset_vbf(const struct rte_member_setsum *ss)
+{
+ uint32_t *vbf = ss->table;
+ memset(vbf, 0, (ss->num_set * ss->bits) >> 3);
+}
diff --git a/lib/librte_member/rte_member_vbf.h b/lib/librte_member/rte_member_vbf.h
new file mode 100644
index 00000000..5bc158b9
--- /dev/null
+++ b/lib/librte_member/rte_member_vbf.h
@@ -0,0 +1,82 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMBER_VBF_H_
+#define _RTE_MEMBER_VBF_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Currently we only support up to 32 sets in vBF */
+#define RTE_MEMBER_MAX_BF 32
+
+int
+rte_member_create_vbf(struct rte_member_setsum *ss,
+ const struct rte_member_parameters *params);
+
+int
+rte_member_lookup_vbf(const struct rte_member_setsum *setsum,
+ const void *key, member_set_t *set_id);
+
+uint32_t
+rte_member_lookup_bulk_vbf(const struct rte_member_setsum *setsum,
+ const void **keys, uint32_t num_keys,
+ member_set_t *set_ids);
+
+uint32_t
+rte_member_lookup_multi_vbf(const struct rte_member_setsum *setsum,
+ const void *key, uint32_t match_per_key,
+ member_set_t *set_id);
+
+uint32_t
+rte_member_lookup_multi_bulk_vbf(const struct rte_member_setsum *setsum,
+ const void **keys, uint32_t num_keys, uint32_t match_per_key,
+ uint32_t *match_count,
+ member_set_t *set_ids);
+
+int
+rte_member_add_vbf(const struct rte_member_setsum *setsum,
+ const void *key, member_set_t set_id);
+
+void
+rte_member_free_vbf(struct rte_member_setsum *ss);
+
+void
+rte_member_reset_vbf(const struct rte_member_setsum *setsum);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMBER_VBF_H_ */
diff --git a/lib/librte_member/rte_member_version.map b/lib/librte_member/rte_member_version.map
new file mode 100644
index 00000000..019e4cd9
--- /dev/null
+++ b/lib/librte_member/rte_member_version.map
@@ -0,0 +1,16 @@
+DPDK_17.11 {
+ global:
+
+ rte_member_add;
+ rte_member_create;
+ rte_member_delete;
+ rte_member_find_existing;
+ rte_member_free;
+ rte_member_lookup;
+ rte_member_lookup_bulk;
+ rte_member_lookup_multi;
+ rte_member_lookup_multi_bulk;
+ rte_member_reset;
+
+ local: *;
+};
diff --git a/lib/librte_member/rte_member_x86.h b/lib/librte_member/rte_member_x86.h
new file mode 100644
index 00000000..d29dd3fe
--- /dev/null
+++ b/lib/librte_member/rte_member_x86.h
@@ -0,0 +1,107 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMBER_X86_H_
+#define _RTE_MEMBER_X86_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <x86intrin.h>
+
+#if defined(RTE_MACHINE_CPUFLAG_AVX2)
+
+static inline int
+update_entry_search_avx(uint32_t bucket_id, member_sig_t tmp_sig,
+ struct member_ht_bucket *buckets,
+ member_set_t set_id)
+{
+ uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+ _mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
+ _mm256_set1_epi16(tmp_sig)));
+ if (hitmask) {
+ uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+ buckets[bucket_id].sets[hit_idx] = set_id;
+ return 1;
+ }
+ return 0;
+}
+
+static inline int
+search_bucket_single_avx(uint32_t bucket_id, member_sig_t tmp_sig,
+ struct member_ht_bucket *buckets,
+ member_set_t *set_id)
+{
+ uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+ _mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
+ _mm256_set1_epi16(tmp_sig)));
+ while (hitmask) {
+ uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+ if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) {
+ *set_id = buckets[bucket_id].sets[hit_idx];
+ return 1;
+ }
+ hitmask &= ~(3U << ((hit_idx) << 1));
+ }
+ return 0;
+}
+
+static inline void
+search_bucket_multi_avx(uint32_t bucket_id, member_sig_t tmp_sig,
+ struct member_ht_bucket *buckets,
+ uint32_t *counter,
+ uint32_t match_per_key,
+ member_set_t *set_id)
+{
+ uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+ _mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
+ _mm256_set1_epi16(tmp_sig)));
+ while (hitmask) {
+ uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+ if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) {
+ set_id[*counter] = buckets[bucket_id].sets[hit_idx];
+ (*counter)++;
+ if (*counter >= match_per_key)
+ return;
+ }
+ hitmask &= ~(3U << ((hit_idx) << 1));
+ }
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMBER_X86_H_ */
diff --git a/lib/librte_mempool/Makefile b/lib/librte_mempool/Makefile
index 7b5bdfee..46654e32 100644
--- a/lib/librte_mempool/Makefile
+++ b/lib/librte_mempool/Makefile
@@ -35,10 +35,11 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_mempool.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+LDLIBS += -lrte_eal -lrte_ring
EXPORT_MAP := rte_mempool_version.map
-LIBABIVER := 2
+LIBABIVER := 3
# all source are stored in SRCS-y
SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool.c
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 6fc3c9c7..d50dba49 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -128,7 +128,7 @@ static unsigned optimize_object_size(unsigned obj_size)
}
static void
-mempool_add_elem(struct rte_mempool *mp, void *obj, phys_addr_t physaddr)
+mempool_add_elem(struct rte_mempool *mp, void *obj, rte_iova_t iova)
{
struct rte_mempool_objhdr *hdr;
struct rte_mempool_objtlr *tlr __rte_unused;
@@ -136,7 +136,7 @@ mempool_add_elem(struct rte_mempool *mp, void *obj, phys_addr_t physaddr)
/* set mempool ptr in header */
hdr = RTE_PTR_SUB(obj, sizeof(*hdr));
hdr->mp = mp;
- hdr->physaddr = physaddr;
+ hdr->iova = iova;
STAILQ_INSERT_TAIL(&mp->elt_list, hdr, next);
mp->populated_size++;
@@ -238,9 +238,16 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
* Calculate maximum amount of memory required to store given number of objects.
*/
size_t
-rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift)
+rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
+ unsigned int flags)
{
size_t obj_per_page, pg_num, pg_sz;
+ unsigned int mask;
+
+ mask = MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS | MEMPOOL_F_CAPA_PHYS_CONTIG;
+ if ((flags & mask) == mask)
+ /* alignment need one additional object */
+ elt_num += 1;
if (total_elt_sz == 0)
return 0;
@@ -263,23 +270,29 @@ rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift)
*/
ssize_t
rte_mempool_xmem_usage(__rte_unused void *vaddr, uint32_t elt_num,
- size_t total_elt_sz, const phys_addr_t paddr[], uint32_t pg_num,
- uint32_t pg_shift)
+ size_t total_elt_sz, const rte_iova_t iova[], uint32_t pg_num,
+ uint32_t pg_shift, unsigned int flags)
{
uint32_t elt_cnt = 0;
- phys_addr_t start, end;
- uint32_t paddr_idx;
+ rte_iova_t start, end;
+ uint32_t iova_idx;
size_t pg_sz = (size_t)1 << pg_shift;
+ unsigned int mask;
+
+ mask = MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS | MEMPOOL_F_CAPA_PHYS_CONTIG;
+ if ((flags & mask) == mask)
+ /* alignment need one additional object */
+ elt_num += 1;
- /* if paddr is NULL, assume contiguous memory */
- if (paddr == NULL) {
+ /* if iova is NULL, assume contiguous memory */
+ if (iova == NULL) {
start = 0;
end = pg_sz * pg_num;
- paddr_idx = pg_num;
+ iova_idx = pg_num;
} else {
- start = paddr[0];
- end = paddr[0] + pg_sz;
- paddr_idx = 1;
+ start = iova[0];
+ end = iova[0] + pg_sz;
+ iova_idx = 1;
}
while (elt_cnt < elt_num) {
@@ -287,15 +300,15 @@ rte_mempool_xmem_usage(__rte_unused void *vaddr, uint32_t elt_num,
/* enough contiguous memory, add an object */
start += total_elt_sz;
elt_cnt++;
- } else if (paddr_idx < pg_num) {
+ } else if (iova_idx < pg_num) {
/* no room to store one obj, add a page */
- if (end == paddr[paddr_idx]) {
+ if (end == iova[iova_idx]) {
end += pg_sz;
} else {
- start = paddr[paddr_idx];
- end = paddr[paddr_idx] + pg_sz;
+ start = iova[iova_idx];
+ end = iova[iova_idx] + pg_sz;
}
- paddr_idx++;
+ iova_idx++;
} else {
/* no more page, return how many elements fit */
@@ -303,7 +316,7 @@ rte_mempool_xmem_usage(__rte_unused void *vaddr, uint32_t elt_num,
}
}
- return (size_t)paddr_idx << pg_shift;
+ return (size_t)iova_idx << pg_shift;
}
/* free a memchunk allocated with rte_memzone_reserve() */
@@ -344,8 +357,8 @@ rte_mempool_free_memchunks(struct rte_mempool *mp)
* on error.
*/
int
-rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
- phys_addr_t paddr, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
+rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
+ rte_iova_t iova, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
void *opaque)
{
unsigned total_elt_sz;
@@ -354,6 +367,11 @@ rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
struct rte_mempool_memhdr *memhdr;
int ret;
+ /* Notify memory area to mempool */
+ ret = rte_mempool_ops_register_memory_area(mp, vaddr, iova, len);
+ if (ret != -ENOTSUP && ret < 0)
+ return ret;
+
/* create the internal ring if not already done */
if ((mp->flags & MEMPOOL_F_POOL_CREATED) == 0) {
ret = rte_mempool_ops_alloc(mp);
@@ -368,29 +386,42 @@ rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+ /* Detect pool area has sufficient space for elements */
+ if (mp->flags & MEMPOOL_F_CAPA_PHYS_CONTIG) {
+ if (len < total_elt_sz * mp->size) {
+ RTE_LOG(ERR, MEMPOOL,
+ "pool area %" PRIx64 " not enough\n",
+ (uint64_t)len);
+ return -ENOSPC;
+ }
+ }
+
memhdr = rte_zmalloc("MEMPOOL_MEMHDR", sizeof(*memhdr), 0);
if (memhdr == NULL)
return -ENOMEM;
memhdr->mp = mp;
memhdr->addr = vaddr;
- memhdr->phys_addr = paddr;
+ memhdr->iova = iova;
memhdr->len = len;
memhdr->free_cb = free_cb;
memhdr->opaque = opaque;
- if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN)
+ if (mp->flags & MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS)
+ /* align object start address to a multiple of total_elt_sz */
+ off = total_elt_sz - ((uintptr_t)vaddr % total_elt_sz);
+ else if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN)
off = RTE_PTR_ALIGN_CEIL(vaddr, 8) - vaddr;
else
off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_CACHE_LINE_SIZE) - vaddr;
while (off + total_elt_sz <= len && mp->populated_size < mp->size) {
off += mp->header_size;
- if (paddr == RTE_BAD_PHYS_ADDR)
+ if (iova == RTE_BAD_IOVA)
mempool_add_elem(mp, (char *)vaddr + off,
- RTE_BAD_PHYS_ADDR);
+ RTE_BAD_IOVA);
else
- mempool_add_elem(mp, (char *)vaddr + off, paddr + off);
+ mempool_add_elem(mp, (char *)vaddr + off, iova + off);
off += mp->elt_size + mp->trailer_size;
i++;
}
@@ -404,12 +435,20 @@ rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
return i;
}
+int
+rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
+ phys_addr_t paddr, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
+ void *opaque)
+{
+ return rte_mempool_populate_iova(mp, vaddr, paddr, len, free_cb, opaque);
+}
+
/* Add objects in the pool, using a table of physical pages. Return the
* number of objects added, or a negative value on error.
*/
int
-rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr,
- const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
+rte_mempool_populate_iova_tab(struct rte_mempool *mp, char *vaddr,
+ const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift,
rte_mempool_memchunk_free_cb_t *free_cb, void *opaque)
{
uint32_t i, n;
@@ -421,18 +460,18 @@ rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr,
return -EEXIST;
if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG)
- return rte_mempool_populate_phys(mp, vaddr, RTE_BAD_PHYS_ADDR,
+ return rte_mempool_populate_iova(mp, vaddr, RTE_BAD_IOVA,
pg_num * pg_sz, free_cb, opaque);
for (i = 0; i < pg_num && mp->populated_size < mp->size; i += n) {
/* populate with the largest group of contiguous pages */
for (n = 1; (i + n) < pg_num &&
- paddr[i + n - 1] + pg_sz == paddr[i + n]; n++)
+ iova[i + n - 1] + pg_sz == iova[i + n]; n++)
;
- ret = rte_mempool_populate_phys(mp, vaddr + i * pg_sz,
- paddr[i], n * pg_sz, free_cb, opaque);
+ ret = rte_mempool_populate_iova(mp, vaddr + i * pg_sz,
+ iova[i], n * pg_sz, free_cb, opaque);
if (ret < 0) {
rte_mempool_free_memchunks(mp);
return ret;
@@ -444,6 +483,15 @@ rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr,
return cnt;
}
+int
+rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr,
+ const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
+ rte_mempool_memchunk_free_cb_t *free_cb, void *opaque)
+{
+ return rte_mempool_populate_iova_tab(mp, vaddr, paddr, pg_num, pg_shift,
+ free_cb, opaque);
+}
+
/* Populate the mempool with a virtual area. Return the number of
* objects added, or a negative value on error.
*/
@@ -452,7 +500,7 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr,
size_t len, size_t pg_sz, rte_mempool_memchunk_free_cb_t *free_cb,
void *opaque)
{
- phys_addr_t paddr;
+ rte_iova_t iova;
size_t off, phys_len;
int ret, cnt = 0;
@@ -466,33 +514,30 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr,
return -EINVAL;
if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG)
- return rte_mempool_populate_phys(mp, addr, RTE_BAD_PHYS_ADDR,
+ return rte_mempool_populate_iova(mp, addr, RTE_BAD_IOVA,
len, free_cb, opaque);
for (off = 0; off + pg_sz <= len &&
mp->populated_size < mp->size; off += phys_len) {
- paddr = rte_mem_virt2phy(addr + off);
- /* required for xen_dom0 to get the machine address */
- paddr = rte_mem_phy2mch(-1, paddr);
+ iova = rte_mem_virt2iova(addr + off);
- if (paddr == RTE_BAD_PHYS_ADDR && rte_eal_has_hugepages()) {
+ if (iova == RTE_BAD_IOVA && rte_eal_has_hugepages()) {
ret = -EINVAL;
goto fail;
}
/* populate with the largest group of contiguous pages */
for (phys_len = pg_sz; off + phys_len < len; phys_len += pg_sz) {
- phys_addr_t paddr_tmp;
+ rte_iova_t iova_tmp;
- paddr_tmp = rte_mem_virt2phy(addr + off + phys_len);
- paddr_tmp = rte_mem_phy2mch(-1, paddr_tmp);
+ iova_tmp = rte_mem_virt2iova(addr + off + phys_len);
- if (paddr_tmp != paddr + phys_len)
+ if (iova_tmp != iova + phys_len)
break;
}
- ret = rte_mempool_populate_phys(mp, addr + off, paddr,
+ ret = rte_mempool_populate_iova(mp, addr + off, iova,
phys_len, free_cb, opaque);
if (ret < 0)
goto fail;
@@ -515,23 +560,29 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr,
int
rte_mempool_populate_default(struct rte_mempool *mp)
{
- int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
+ unsigned int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
char mz_name[RTE_MEMZONE_NAMESIZE];
const struct rte_memzone *mz;
size_t size, total_elt_sz, align, pg_sz, pg_shift;
- phys_addr_t paddr;
+ rte_iova_t iova;
unsigned mz_id, n;
+ unsigned int mp_flags;
int ret;
/* mempool must not be populated */
if (mp->nb_mem_chunks != 0)
return -EEXIST;
- if (rte_xen_dom0_supported()) {
- pg_sz = RTE_PGSIZE_2M;
- pg_shift = rte_bsf32(pg_sz);
- align = pg_sz;
- } else if (rte_eal_has_hugepages()) {
+ /* Get mempool capabilities */
+ mp_flags = 0;
+ ret = rte_mempool_ops_get_capabilities(mp, &mp_flags);
+ if ((ret < 0) && (ret != -ENOTSUP))
+ return ret;
+
+ /* update mempool capabilities */
+ mp->flags |= mp_flags;
+
+ if (rte_eal_has_hugepages()) {
pg_shift = 0; /* not needed, zone is physically contiguous */
pg_sz = 0;
align = RTE_CACHE_LINE_SIZE;
@@ -543,7 +594,8 @@ rte_mempool_populate_default(struct rte_mempool *mp)
total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
for (mz_id = 0, n = mp->size; n > 0; mz_id++, n -= ret) {
- size = rte_mempool_xmem_size(n, total_elt_sz, pg_shift);
+ size = rte_mempool_xmem_size(n, total_elt_sz, pg_shift,
+ mp->flags);
ret = snprintf(mz_name, sizeof(mz_name),
RTE_MEMPOOL_MZ_FORMAT "_%d", mp->name, mz_id);
@@ -564,13 +616,13 @@ rte_mempool_populate_default(struct rte_mempool *mp)
}
if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG)
- paddr = RTE_BAD_PHYS_ADDR;
+ iova = RTE_BAD_IOVA;
else
- paddr = mz->phys_addr;
+ iova = mz->iova;
- if (rte_eal_has_hugepages() && !rte_xen_dom0_supported())
- ret = rte_mempool_populate_phys(mp, mz->addr,
- paddr, mz->len,
+ if (rte_eal_has_hugepages())
+ ret = rte_mempool_populate_iova(mp, mz->addr,
+ iova, mz->len,
rte_mempool_memchunk_mz_free,
(void *)(uintptr_t)mz);
else
@@ -600,7 +652,8 @@ get_anon_size(const struct rte_mempool *mp)
pg_sz = getpagesize();
pg_shift = rte_bsf32(pg_sz);
total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
- size = rte_mempool_xmem_size(mp->size, total_elt_sz, pg_shift);
+ size = rte_mempool_xmem_size(mp->size, total_elt_sz, pg_shift,
+ mp->flags);
return size;
}
@@ -742,7 +795,7 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
struct rte_tailq_entry *te = NULL;
const struct rte_memzone *mz = NULL;
size_t mempool_size;
- int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
+ unsigned int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
struct rte_mempool_objsz objsz;
unsigned lcore_id;
int ret;
@@ -922,7 +975,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
rte_mempool_ctor_t *mp_init, void *mp_init_arg,
rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
int socket_id, unsigned flags, void *vaddr,
- const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift)
+ const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift)
{
struct rte_mempool *mp = NULL;
int ret;
@@ -934,7 +987,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
obj_init, obj_init_arg, socket_id, flags);
/* check that we have both VA and PA */
- if (paddr == NULL) {
+ if (iova == NULL) {
rte_errno = EINVAL;
return NULL;
}
@@ -954,7 +1007,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
if (mp_init)
mp_init(mp, mp_init_arg);
- ret = rte_mempool_populate_phys_tab(mp, vaddr, paddr, pg_num, pg_shift,
+ ret = rte_mempool_populate_iova_tab(mp, vaddr, iova, pg_num, pg_shift,
NULL, NULL);
if (ret < 0 || ret != (int)mp->size)
goto fail;
@@ -1177,7 +1230,7 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
fprintf(f, "mempool <%s>@%p\n", mp->name, mp);
fprintf(f, " flags=%x\n", mp->flags);
fprintf(f, " pool=%p\n", mp->pool_data);
- fprintf(f, " phys_addr=0x%" PRIx64 "\n", mp->mz->phys_addr);
+ fprintf(f, " iova=0x%" PRIx64 "\n", mp->mz->iova);
fprintf(f, " nb_mem_chunks=%u\n", mp->nb_mem_chunks);
fprintf(f, " size=%"PRIu32"\n", mp->size);
fprintf(f, " populated_size=%"PRIu32"\n", mp->populated_size);
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 76b5b3b1..721227f6 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -157,7 +157,11 @@ struct rte_mempool_objsz {
struct rte_mempool_objhdr {
STAILQ_ENTRY(rte_mempool_objhdr) next; /**< Next in list. */
struct rte_mempool *mp; /**< The mempool owning the object. */
- phys_addr_t physaddr; /**< Physical address of the object. */
+ RTE_STD_C11
+ union {
+ rte_iova_t iova; /**< IO address of the object. */
+ phys_addr_t physaddr; /**< deprecated - Physical address of the object. */
+ };
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
uint64_t cookie; /**< Debug cookie. */
#endif
@@ -203,7 +207,11 @@ struct rte_mempool_memhdr {
STAILQ_ENTRY(rte_mempool_memhdr) next; /**< Next in list. */
struct rte_mempool *mp; /**< The mempool owning the chunk */
void *addr; /**< Virtual address of the chunk */
- phys_addr_t phys_addr; /**< Physical address of the chunk */
+ RTE_STD_C11
+ union {
+ rte_iova_t iova; /**< IO address of the chunk */
+ phys_addr_t phys_addr; /**< Physical address of the chunk */
+ };
size_t len; /**< length of the chunk */
rte_mempool_memchunk_free_cb_t *free_cb; /**< Free callback */
void *opaque; /**< Argument passed to the free callback */
@@ -226,7 +234,7 @@ struct rte_mempool {
};
void *pool_config; /**< optional args for ops alloc. */
const struct rte_memzone *mz; /**< Memzone where pool is alloc'd. */
- int flags; /**< Flags of the mempool. */
+ unsigned int flags; /**< Flags of the mempool. */
int socket_id; /**< Socket id passed at create. */
uint32_t size; /**< Max size of the mempool. */
uint32_t cache_size;
@@ -265,6 +273,24 @@ struct rte_mempool {
#define MEMPOOL_F_SC_GET 0x0008 /**< Default get is "single-consumer".*/
#define MEMPOOL_F_POOL_CREATED 0x0010 /**< Internal: pool is created. */
#define MEMPOOL_F_NO_PHYS_CONTIG 0x0020 /**< Don't need physically contiguous objs. */
+/**
+ * This capability flag is advertised by a mempool handler, if the whole
+ * memory area containing the objects must be physically contiguous.
+ * Note: This flag should not be passed by application.
+ */
+#define MEMPOOL_F_CAPA_PHYS_CONTIG 0x0040
+/**
+ * This capability flag is advertised by a mempool handler. Used for a case
+ * where mempool driver wants object start address(vaddr) aligned to block
+ * size(/ total element size).
+ *
+ * Note:
+ * - This flag should not be passed by application.
+ * Flag used for mempool driver only.
+ * - Mempool driver must also set MEMPOOL_F_CAPA_PHYS_CONTIG flag along with
+ * MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS.
+ */
+#define MEMPOOL_F_CAPA_BLK_ALIGNED_OBJECTS 0x0080
/**
* @internal When debug is enabled, store some statistics.
@@ -389,6 +415,18 @@ typedef int (*rte_mempool_dequeue_t)(struct rte_mempool *mp,
*/
typedef unsigned (*rte_mempool_get_count)(const struct rte_mempool *mp);
+/**
+ * Get the mempool capabilities.
+ */
+typedef int (*rte_mempool_get_capabilities_t)(const struct rte_mempool *mp,
+ unsigned int *flags);
+
+/**
+ * Notify new memory area to mempool.
+ */
+typedef int (*rte_mempool_ops_register_memory_area_t)
+(const struct rte_mempool *mp, char *vaddr, rte_iova_t iova, size_t len);
+
/** Structure defining mempool operations structure */
struct rte_mempool_ops {
char name[RTE_MEMPOOL_OPS_NAMESIZE]; /**< Name of mempool ops struct. */
@@ -397,6 +435,14 @@ struct rte_mempool_ops {
rte_mempool_enqueue_t enqueue; /**< Enqueue an object. */
rte_mempool_dequeue_t dequeue; /**< Dequeue an object. */
rte_mempool_get_count get_count; /**< Get qty of available objs. */
+ /**
+ * Get the mempool capabilities
+ */
+ rte_mempool_get_capabilities_t get_capabilities;
+ /**
+ * Notify new memory area to mempool
+ */
+ rte_mempool_ops_register_memory_area_t register_memory_area;
} __rte_cache_aligned;
#define RTE_MEMPOOL_MAX_OPS_IDX 16 /**< Max registered ops structs */
@@ -509,6 +555,43 @@ unsigned
rte_mempool_ops_get_count(const struct rte_mempool *mp);
/**
+ * @internal wrapper for mempool_ops get_capabilities callback.
+ *
+ * @param mp [in]
+ * Pointer to the memory pool.
+ * @param flags [out]
+ * Pointer to the mempool flags.
+ * @return
+ * - 0: Success; The mempool driver has advertised his pool capabilities in
+ * flags param.
+ * - -ENOTSUP - doesn't support get_capabilities ops (valid case).
+ * - Otherwise, pool create fails.
+ */
+int
+rte_mempool_ops_get_capabilities(const struct rte_mempool *mp,
+ unsigned int *flags);
+/**
+ * @internal wrapper for mempool_ops register_memory_area callback.
+ * API to notify the mempool handler when a new memory area is added to pool.
+ *
+ * @param mp
+ * Pointer to the memory pool.
+ * @param vaddr
+ * Pointer to the buffer virtual address.
+ * @param iova
+ * Pointer to the buffer IO address.
+ * @param len
+ * Pool size.
+ * @return
+ * - 0: Success;
+ * - -ENOTSUP - doesn't support register_memory_area ops (valid error case).
+ * - Otherwise, rte_mempool_populate_phys fails thus pool create fails.
+ */
+int
+rte_mempool_ops_register_memory_area(const struct rte_mempool *mp,
+ char *vaddr, rte_iova_t iova, size_t len);
+
+/**
* @internal wrapper for mempool_ops free callback.
*
* @param mp
@@ -722,11 +805,10 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
* @param vaddr
* Virtual address of the externally allocated memory buffer.
* Will be used to store mempool objects.
- * @param paddr
- * Array of physical addresses of the pages that comprises given memory
- * buffer.
+ * @param iova
+ * Array of IO addresses of the pages that comprises given memory buffer.
* @param pg_num
- * Number of elements in the paddr array.
+ * Number of elements in the iova array.
* @param pg_shift
* LOG2 of the physical pages size.
* @return
@@ -739,7 +821,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
rte_mempool_ctor_t *mp_init, void *mp_init_arg,
rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
int socket_id, unsigned flags, void *vaddr,
- const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift);
+ const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift);
/**
* Create an empty mempool
@@ -798,7 +880,7 @@ rte_mempool_free(struct rte_mempool *mp);
* Add a virtually and physically contiguous memory chunk in the pool
* where objects can be instantiated.
*
- * If the given physical address is unknown (paddr = RTE_BAD_PHYS_ADDR),
+ * If the given IO address is unknown (iova = RTE_BAD_IOVA),
* the chunk doesn't need to be physically contiguous (only virtually),
* and allocated objects may span two pages.
*
@@ -806,8 +888,8 @@ rte_mempool_free(struct rte_mempool *mp);
* A pointer to the mempool structure.
* @param vaddr
* The virtual address of memory that should be used to store objects.
- * @param paddr
- * The physical address
+ * @param iova
+ * The IO address
* @param len
* The length of memory in bytes.
* @param free_cb
@@ -819,6 +901,11 @@ rte_mempool_free(struct rte_mempool *mp);
* On error, the chunk is not added in the memory list of the
* mempool and a negative errno is returned.
*/
+int rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
+ rte_iova_t iova, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
+ void *opaque);
+
+__rte_deprecated
int rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
phys_addr_t paddr, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
void *opaque);
@@ -827,18 +914,17 @@ int rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
* Add physical memory for objects in the pool at init
*
* Add a virtually contiguous memory chunk in the pool where objects can
- * be instantiated. The physical addresses corresponding to the virtual
- * area are described in paddr[], pg_num, pg_shift.
+ * be instantiated. The IO addresses corresponding to the virtual
+ * area are described in iova[], pg_num, pg_shift.
*
* @param mp
* A pointer to the mempool structure.
* @param vaddr
* The virtual address of memory that should be used to store objects.
- * @param paddr
- * An array of physical addresses of each page composing the virtual
- * area.
+ * @param iova
+ * An array of IO addresses of each page composing the virtual area.
* @param pg_num
- * Number of elements in the paddr array.
+ * Number of elements in the iova array.
* @param pg_shift
* LOG2 of the physical pages size.
* @param free_cb
@@ -850,6 +936,11 @@ int rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
* On error, the chunks are not added in the memory list of the
* mempool and a negative errno is returned.
*/
+int rte_mempool_populate_iova_tab(struct rte_mempool *mp, char *vaddr,
+ const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift,
+ rte_mempool_memchunk_free_cb_t *free_cb, void *opaque);
+
+__rte_deprecated
int rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr,
const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
rte_mempool_memchunk_free_cb_t *free_cb, void *opaque);
@@ -1034,13 +1125,10 @@ rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
* positive.
* @param cache
* A pointer to a mempool cache structure. May be NULL if not needed.
- * @param flags
- * The flags used for the mempool creation.
- * Single-producer (MEMPOOL_F_SP_PUT flag) or multi-producers.
*/
static __rte_always_inline void
__mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
- unsigned n, struct rte_mempool_cache *cache)
+ unsigned int n, struct rte_mempool_cache *cache)
{
void **cache_objs;
@@ -1096,14 +1184,10 @@ ring_enqueue:
* The number of objects to add in the mempool from the obj_table.
* @param cache
* A pointer to a mempool cache structure. May be NULL if not needed.
- * @param flags
- * The flags used for the mempool creation.
- * Single-producer (MEMPOOL_F_SP_PUT flag) or multi-producers.
*/
static __rte_always_inline void
rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
- unsigned n, struct rte_mempool_cache *cache,
- __rte_unused int flags)
+ unsigned int n, struct rte_mempool_cache *cache)
{
__mempool_check_cookies(mp, obj_table, n, 0);
__mempool_generic_put(mp, obj_table, n, cache);
@@ -1125,11 +1209,11 @@ rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
*/
static __rte_always_inline void
rte_mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
- unsigned n)
+ unsigned int n)
{
struct rte_mempool_cache *cache;
cache = rte_mempool_default_cache(mp, rte_lcore_id());
- rte_mempool_generic_put(mp, obj_table, n, cache, mp->flags);
+ rte_mempool_generic_put(mp, obj_table, n, cache);
}
/**
@@ -1160,16 +1244,13 @@ rte_mempool_put(struct rte_mempool *mp, void *obj)
* The number of objects to get, must be strictly positive.
* @param cache
* A pointer to a mempool cache structure. May be NULL if not needed.
- * @param flags
- * The flags used for the mempool creation.
- * Single-consumer (MEMPOOL_F_SC_GET flag) or multi-consumers.
* @return
* - >=0: Success; number of objects supplied.
* - <0: Error; code of ring dequeue function.
*/
static __rte_always_inline int
__mempool_generic_get(struct rte_mempool *mp, void **obj_table,
- unsigned n, struct rte_mempool_cache *cache)
+ unsigned int n, struct rte_mempool_cache *cache)
{
int ret;
uint32_t index, len;
@@ -1241,16 +1322,13 @@ ring_dequeue:
* The number of objects to get from mempool to obj_table.
* @param cache
* A pointer to a mempool cache structure. May be NULL if not needed.
- * @param flags
- * The flags used for the mempool creation.
- * Single-consumer (MEMPOOL_F_SC_GET flag) or multi-consumers.
* @return
* - 0: Success; objects taken.
* - -ENOENT: Not enough entries in the mempool; no object is retrieved.
*/
static __rte_always_inline int
-rte_mempool_generic_get(struct rte_mempool *mp, void **obj_table, unsigned n,
- struct rte_mempool_cache *cache, __rte_unused int flags)
+rte_mempool_generic_get(struct rte_mempool *mp, void **obj_table,
+ unsigned int n, struct rte_mempool_cache *cache)
{
int ret;
ret = __mempool_generic_get(mp, obj_table, n, cache);
@@ -1282,11 +1360,11 @@ rte_mempool_generic_get(struct rte_mempool *mp, void **obj_table, unsigned n,
* - -ENOENT: Not enough entries in the mempool; no object is retrieved.
*/
static __rte_always_inline int
-rte_mempool_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n)
+rte_mempool_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned int n)
{
struct rte_mempool_cache *cache;
cache = rte_mempool_default_cache(mp, rte_lcore_id());
- return rte_mempool_generic_get(mp, obj_table, n, cache, mp->flags);
+ return rte_mempool_generic_get(mp, obj_table, n, cache);
}
/**
@@ -1383,24 +1461,29 @@ rte_mempool_empty(const struct rte_mempool *mp)
}
/**
- * Return the physical address of elt, which is an element of the pool mp.
+ * Return the IO address of elt, which is an element of the pool mp.
*
- * @param mp
- * A pointer to the mempool structure.
* @param elt
* A pointer (virtual address) to the element of the pool.
* @return
- * The physical address of the elt element.
+ * The IO address of the elt element.
* If the mempool was created with MEMPOOL_F_NO_PHYS_CONTIG, the
- * returned value is RTE_BAD_PHYS_ADDR.
+ * returned value is RTE_BAD_IOVA.
*/
-static inline phys_addr_t
-rte_mempool_virt2phy(__rte_unused const struct rte_mempool *mp, const void *elt)
+static inline rte_iova_t
+rte_mempool_virt2iova(const void *elt)
{
const struct rte_mempool_objhdr *hdr;
hdr = (const struct rte_mempool_objhdr *)RTE_PTR_SUB(elt,
sizeof(*hdr));
- return hdr->physaddr;
+ return hdr->iova;
+}
+
+__rte_deprecated
+static inline phys_addr_t
+rte_mempool_virt2phy(__rte_unused const struct rte_mempool *mp, const void *elt)
+{
+ return rte_mempool_virt2iova(elt);
}
/**
@@ -1489,11 +1572,13 @@ uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
* by rte_mempool_calc_obj_size().
* @param pg_shift
* LOG2 of the physical pages size. If set to 0, ignore page boundaries.
+ * @param flags
+ * The mempool flags.
* @return
* Required memory size aligned at page boundary.
*/
size_t rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz,
- uint32_t pg_shift);
+ uint32_t pg_shift, unsigned int flags);
/**
* Get the size of memory required to store mempool elements.
@@ -1509,13 +1594,14 @@ size_t rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz,
* @param total_elt_sz
* The size of each element, including header and trailer, as returned
* by rte_mempool_calc_obj_size().
- * @param paddr
- * Array of physical addresses of the pages that comprises given memory
- * buffer.
+ * @param iova
+ * Array of IO addresses of the pages that comprises given memory buffer.
* @param pg_num
- * Number of elements in the paddr array.
+ * Number of elements in the iova array.
* @param pg_shift
* LOG2 of the physical pages size.
+ * @param flags
+ * The mempool flags.
* @return
* On success, the number of bytes needed to store given number of
* objects, aligned to the given page size. If the provided memory
@@ -1523,8 +1609,8 @@ size_t rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz,
* is the actual number of elements that can be stored in that buffer.
*/
ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num,
- size_t total_elt_sz, const phys_addr_t paddr[], uint32_t pg_num,
- uint32_t pg_shift);
+ size_t total_elt_sz, const rte_iova_t iova[], uint32_t pg_num,
+ uint32_t pg_shift, unsigned int flags);
/**
* Walk list of all memory pools
diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
index 5f24de25..92b9f90c 100644
--- a/lib/librte_mempool/rte_mempool_ops.c
+++ b/lib/librte_mempool/rte_mempool_ops.c
@@ -37,6 +37,7 @@
#include <rte_mempool.h>
#include <rte_errno.h>
+#include <rte_dev.h>
/* indirect jump table to support external memory pools. */
struct rte_mempool_ops_table rte_mempool_ops_table = {
@@ -85,6 +86,8 @@ rte_mempool_register_ops(const struct rte_mempool_ops *h)
ops->enqueue = h->enqueue;
ops->dequeue = h->dequeue;
ops->get_count = h->get_count;
+ ops->get_capabilities = h->get_capabilities;
+ ops->register_memory_area = h->register_memory_area;
rte_spinlock_unlock(&rte_mempool_ops_table.sl);
@@ -123,6 +126,32 @@ rte_mempool_ops_get_count(const struct rte_mempool *mp)
return ops->get_count(mp);
}
+/* wrapper to get external mempool capabilities. */
+int
+rte_mempool_ops_get_capabilities(const struct rte_mempool *mp,
+ unsigned int *flags)
+{
+ struct rte_mempool_ops *ops;
+
+ ops = rte_mempool_get_ops(mp->ops_index);
+
+ RTE_FUNC_PTR_OR_ERR_RET(ops->get_capabilities, -ENOTSUP);
+ return ops->get_capabilities(mp, flags);
+}
+
+/* wrapper to notify new memory area to external mempool */
+int
+rte_mempool_ops_register_memory_area(const struct rte_mempool *mp, char *vaddr,
+ rte_iova_t iova, size_t len)
+{
+ struct rte_mempool_ops *ops;
+
+ ops = rte_mempool_get_ops(mp->ops_index);
+
+ RTE_FUNC_PTR_OR_ERR_RET(ops->register_memory_area, -ENOTSUP);
+ return ops->register_memory_area(mp, vaddr, iova, len);
+}
+
/* sets mempool ops previously registered by rte_mempool_register_ops. */
int
rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name,
diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map
index f9c07944..62b76f91 100644
--- a/lib/librte_mempool/rte_mempool_version.map
+++ b/lib/librte_mempool/rte_mempool_version.map
@@ -41,3 +41,13 @@ DPDK_16.07 {
rte_mempool_set_ops_byname;
} DPDK_2.0;
+
+DPDK_17.11 {
+ global:
+
+ rte_mempool_ops_get_capabilities;
+ rte_mempool_ops_register_memory_area;
+ rte_mempool_populate_iova;
+ rte_mempool_populate_iova_tab;
+
+} DPDK_16.07;
diff --git a/lib/librte_meter/Makefile b/lib/librte_meter/Makefile
index 539bfddd..bfeb5d60 100644
--- a/lib/librte_meter/Makefile
+++ b/lib/librte_meter/Makefile
@@ -40,6 +40,7 @@ CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
LDLIBS += -lm
+LDLIBS += -lrte_eal
EXPORT_MAP := rte_meter_version.map
diff --git a/lib/librte_metrics/Makefile b/lib/librte_metrics/Makefile
index d4990e83..a6efba4a 100644
--- a/lib/librte_metrics/Makefile
+++ b/lib/librte_metrics/Makefile
@@ -35,6 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_metrics.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+LDLIBS += -lrte_eal
EXPORT_MAP := rte_metrics_version.map
diff --git a/lib/librte_metrics/rte_metrics.c b/lib/librte_metrics/rte_metrics.c
index b66a72bb..d9404001 100644
--- a/lib/librte_metrics/rte_metrics.c
+++ b/lib/librte_metrics/rte_metrics.c
@@ -115,7 +115,7 @@ rte_metrics_reg_name(const char *name)
int
rte_metrics_reg_names(const char * const *names, uint16_t cnt_names)
{
- struct rte_metrics_meta_s *entry;
+ struct rte_metrics_meta_s *entry = NULL;
struct rte_metrics_data_s *stats;
const struct rte_memzone *memzone;
uint16_t idx_name;
diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index 56727c4d..50c358e5 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -34,6 +34,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_net.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+LDLIBS += -lrte_mbuf -lrte_eal
EXPORT_MAP := rte_net_version.map
LIBABIVER := 1
@@ -42,7 +43,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c
# install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_esp.h
SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_sctp.h rte_icmp.h rte_arp.h
SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_ether.h rte_gre.h rte_net.h
SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_net_crc.h
diff --git a/lib/librte_net/rte_esp.h b/lib/librte_net/rte_esp.h
new file mode 100644
index 00000000..e228af09
--- /dev/null
+++ b/lib/librte_net/rte_esp.h
@@ -0,0 +1,60 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ESP_H_
+#define _RTE_ESP_H_
+
+/**
+ * @file
+ *
+ * ESP-related defines
+ */
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * ESP Header
+ */
+struct esp_hdr {
+ uint32_t spi; /**< Security Parameters Index */
+ uint32_t seq; /**< packet sequence number */
+} __attribute__((__packed__));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_ESP_H_ */
diff --git a/lib/librte_net/rte_ether.h b/lib/librte_net/rte_ether.h
index 917d42a1..06d7b486 100644
--- a/lib/librte_net/rte_ether.h
+++ b/lib/librte_net/rte_ether.h
@@ -358,7 +358,7 @@ static inline int rte_vlan_strip(struct rte_mbuf *m)
return -1;
struct vlan_hdr *vh = (struct vlan_hdr *)(eh + 1);
- m->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
+ m->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
m->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci);
/* Copy ether header over rather than moving whole packet */
diff --git a/lib/librte_net/rte_net.c b/lib/librte_net/rte_net.c
index a8c7aff9..a3ca0403 100644
--- a/lib/librte_net/rte_net.c
+++ b/lib/librte_net/rte_net.c
@@ -396,6 +396,7 @@ uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
if ((layers & RTE_PTYPE_INNER_L2_MASK) == 0)
return pkt_type;
+ hdr_lens->inner_l2_len = 0;
if (proto == rte_cpu_to_be_16(ETHER_TYPE_TEB)) {
eh = rte_pktmbuf_read(m, off, sizeof(*eh), &eh_copy);
if (unlikely(eh == NULL))
diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c
index 661fe322..0c1bf51a 100644
--- a/lib/librte_net/rte_net_crc.c
+++ b/lib/librte_net/rte_net_crc.c
@@ -205,8 +205,7 @@ rte_net_crc_calc(const void *data,
}
/* Select highest available crc algorithm as default one */
-static inline void __attribute__((constructor))
-rte_net_crc_init(void)
+RTE_INIT(rte_net_crc_init)
{
enum rte_net_crc_alg alg = RTE_NET_CRC_SCALAR;
diff --git a/lib/librte_pci/Makefile b/lib/librte_pci/Makefile
new file mode 100644
index 00000000..fe213ea6
--- /dev/null
+++ b/lib/librte_pci/Makefile
@@ -0,0 +1,49 @@
+# BSD LICENSE
+#
+# Copyright(c) 2017 6WIND S.A.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of 6WIND nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_pci.a
+
+CFLAGS := -I$(SRCDIR) $(CFLAGS)
+CFLAGS += $(WERROR_FLAGS) -O3
+LDLIBS += -lrte_eal
+
+EXPORT_MAP := rte_pci_version.map
+
+LIBABIVER := 1
+
+SRCS-$(CONFIG_RTE_LIBRTE_PCI) += rte_pci.c
+
+SYMLINK-$(CONFIG_RTE_LIBRTE_PCI)-include += rte_pci.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_pci/rte_pci.c b/lib/librte_pci/rte_pci.c
new file mode 100644
index 00000000..0160fc1e
--- /dev/null
+++ b/lib/librte_pci/rte_pci.c
@@ -0,0 +1,212 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright 2013-2014 6WIND S.A.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/queue.h>
+#include <sys/mman.h>
+
+#include <rte_errno.h>
+#include <rte_interrupts.h>
+#include <rte_log.h>
+#include <rte_bus.h>
+#include <rte_per_lcore.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_string_fns.h>
+#include <rte_common.h>
+
+#include "rte_pci.h"
+
+static inline const char *
+get_u8_pciaddr_field(const char *in, void *_u8, char dlm)
+{
+ unsigned long val;
+ uint8_t *u8 = _u8;
+ char *end;
+
+ errno = 0;
+ val = strtoul(in, &end, 16);
+ if (errno != 0 || end[0] != dlm || val > UINT8_MAX) {
+ errno = errno ? errno : EINVAL;
+ return NULL;
+ }
+ *u8 = (uint8_t)val;
+ return end + 1;
+}
+
+static int
+pci_bdf_parse(const char *input, struct rte_pci_addr *dev_addr)
+{
+ const char *in = input;
+
+ dev_addr->domain = 0;
+ in = get_u8_pciaddr_field(in, &dev_addr->bus, ':');
+ if (in == NULL)
+ return -EINVAL;
+ in = get_u8_pciaddr_field(in, &dev_addr->devid, '.');
+ if (in == NULL)
+ return -EINVAL;
+ in = get_u8_pciaddr_field(in, &dev_addr->function, '\0');
+ if (in == NULL)
+ return -EINVAL;
+ return 0;
+}
+
+static int
+pci_dbdf_parse(const char *input, struct rte_pci_addr *dev_addr)
+{
+ const char *in = input;
+ unsigned long val;
+ char *end;
+
+ errno = 0;
+ val = strtoul(in, &end, 16);
+ if (errno != 0 || end[0] != ':' || val > UINT16_MAX)
+ return -EINVAL;
+ dev_addr->domain = (uint16_t)val;
+ in = end + 1;
+ in = get_u8_pciaddr_field(in, &dev_addr->bus, ':');
+ if (in == NULL)
+ return -EINVAL;
+ in = get_u8_pciaddr_field(in, &dev_addr->devid, '.');
+ if (in == NULL)
+ return -EINVAL;
+ in = get_u8_pciaddr_field(in, &dev_addr->function, '\0');
+ if (in == NULL)
+ return -EINVAL;
+ return 0;
+}
+
+int
+eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr)
+{
+ return pci_bdf_parse(input, dev_addr);
+}
+
+int
+eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr)
+{
+ return pci_dbdf_parse(input, dev_addr);
+}
+
+void
+rte_pci_device_name(const struct rte_pci_addr *addr,
+ char *output, size_t size)
+{
+ RTE_VERIFY(size >= PCI_PRI_STR_SIZE);
+ RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT,
+ addr->domain, addr->bus,
+ addr->devid, addr->function) >= 0);
+}
+
+int
+rte_eal_compare_pci_addr(const struct rte_pci_addr *addr,
+ const struct rte_pci_addr *addr2)
+{
+ return rte_pci_addr_cmp(addr, addr2);
+}
+
+int
+rte_pci_addr_cmp(const struct rte_pci_addr *addr,
+ const struct rte_pci_addr *addr2)
+{
+ uint64_t dev_addr, dev_addr2;
+
+ if ((addr == NULL) || (addr2 == NULL))
+ return -1;
+
+ dev_addr = ((uint64_t)addr->domain << 24) |
+ (addr->bus << 16) | (addr->devid << 8) | addr->function;
+ dev_addr2 = ((uint64_t)addr2->domain << 24) |
+ (addr2->bus << 16) | (addr2->devid << 8) | addr2->function;
+
+ if (dev_addr > dev_addr2)
+ return 1;
+ else if (dev_addr < dev_addr2)
+ return -1;
+ else
+ return 0;
+}
+
+int
+rte_pci_addr_parse(const char *str, struct rte_pci_addr *addr)
+{
+ if (pci_bdf_parse(str, addr) == 0 ||
+ pci_dbdf_parse(str, addr) == 0)
+ return 0;
+ return -1;
+}
+
+
+/* map a particular resource from a file */
+void *
+pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
+ int additional_flags)
+{
+ void *mapaddr;
+
+ /* Map the PCI memory resource of device */
+ mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | additional_flags, fd, offset);
+ if (mapaddr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n",
+ __func__, fd, requested_addr,
+ (unsigned long)size, (unsigned long)offset,
+ strerror(errno), mapaddr);
+ } else
+ RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr);
+
+ return mapaddr;
+}
+
+/* unmap a particular resource */
+void
+pci_unmap_resource(void *requested_addr, size_t size)
+{
+ if (requested_addr == NULL)
+ return;
+
+ /* Unmap the PCI memory resource of device */
+ if (munmap(requested_addr, size)) {
+ RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n",
+ __func__, requested_addr, (unsigned long)size,
+ strerror(errno));
+ } else
+ RTE_LOG(DEBUG, EAL, " PCI memory unmapped at %p\n",
+ requested_addr);
+}
diff --git a/lib/librte_pci/rte_pci.h b/lib/librte_pci/rte_pci.h
new file mode 100644
index 00000000..4f2cd187
--- /dev/null
+++ b/lib/librte_pci/rte_pci.h
@@ -0,0 +1,263 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * Copyright 2013-2014 6WIND S.A.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PCI_H_
+#define _RTE_PCI_H_
+
+/**
+ * @file
+ *
+ * RTE PCI Library
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/queue.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_debug.h>
+#include <rte_interrupts.h>
+
+/** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */
+#define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
+#define PCI_PRI_STR_SIZE sizeof("XXXXXXXX:XX:XX.X")
+
+/** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */
+#define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
+
+/** Nb. of values in PCI device identifier format string. */
+#define PCI_FMT_NVAL 4
+
+/** Nb. of values in PCI resource format. */
+#define PCI_RESOURCE_FMT_NVAL 3
+
+/** Maximum number of PCI resources. */
+#define PCI_MAX_RESOURCE 6
+
+/**
+ * A structure describing an ID for a PCI driver. Each driver provides a
+ * table of these IDs for each device that it supports.
+ */
+struct rte_pci_id {
+ uint32_t class_id; /**< Class ID or RTE_CLASS_ANY_ID. */
+ uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */
+ uint16_t device_id; /**< Device ID or PCI_ANY_ID. */
+ uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */
+ uint16_t subsystem_device_id; /**< Subsystem device ID or PCI_ANY_ID. */
+};
+
+/**
+ * A structure describing the location of a PCI device.
+ */
+struct rte_pci_addr {
+ uint32_t domain; /**< Device domain */
+ uint8_t bus; /**< Device bus */
+ uint8_t devid; /**< Device ID */
+ uint8_t function; /**< Device function. */
+};
+
+/** Any PCI device identifier (vendor, device, ...) */
+#define PCI_ANY_ID (0xffff)
+#define RTE_CLASS_ANY_ID (0xffffff)
+
+/**
+ * A structure describing a PCI mapping.
+ */
+struct pci_map {
+ void *addr;
+ char *path;
+ uint64_t offset;
+ uint64_t size;
+ uint64_t phaddr;
+};
+
+struct pci_msix_table {
+ int bar_index;
+ uint32_t offset;
+ uint32_t size;
+};
+
+/**
+ * A structure describing a mapped PCI resource.
+ * For multi-process we need to reproduce all PCI mappings in secondary
+ * processes, so save them in a tailq.
+ */
+struct mapped_pci_resource {
+ TAILQ_ENTRY(mapped_pci_resource) next;
+
+ struct rte_pci_addr pci_addr;
+ char path[PATH_MAX];
+ int nb_maps;
+ struct pci_map maps[PCI_MAX_RESOURCE];
+ struct pci_msix_table msix_table;
+};
+
+
+/** mapped pci device list */
+TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource);
+
+/**
+ * @deprecated
+ * Utility function to produce a PCI Bus-Device-Function value
+ * given a string representation. Assumes that the BDF is provided without
+ * a domain prefix (i.e. domain returned is always 0)
+ *
+ * @param input
+ * The input string to be parsed. Should have the format XX:XX.X
+ * @param dev_addr
+ * The PCI Bus-Device-Function address to be returned.
+ * Domain will always be returned as 0
+ * @return
+ * 0 on success, negative on error.
+ */
+int eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr);
+
+/**
+ * @deprecated
+ * Utility function to produce a PCI Bus-Device-Function value
+ * given a string representation. Assumes that the BDF is provided including
+ * a domain prefix.
+ *
+ * @param input
+ * The input string to be parsed. Should have the format XXXX:XX:XX.X
+ * @param dev_addr
+ * The PCI Bus-Device-Function address to be returned
+ * @return
+ * 0 on success, negative on error.
+ */
+int eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr);
+
+/**
+ * Utility function to write a pci device name, this device name can later be
+ * used to retrieve the corresponding rte_pci_addr using eal_parse_pci_*
+ * BDF helpers.
+ *
+ * @param addr
+ * The PCI Bus-Device-Function address
+ * @param output
+ * The output buffer string
+ * @param size
+ * The output buffer size
+ */
+void rte_pci_device_name(const struct rte_pci_addr *addr,
+ char *output, size_t size);
+
+/**
+ * @deprecated
+ * Utility function to compare two PCI device addresses.
+ *
+ * @param addr
+ * The PCI Bus-Device-Function address to compare
+ * @param addr2
+ * The PCI Bus-Device-Function address to compare
+ * @return
+ * 0 on equal PCI address.
+ * Positive on addr is greater than addr2.
+ * Negative on addr is less than addr2, or error.
+ */
+int rte_eal_compare_pci_addr(const struct rte_pci_addr *addr,
+ const struct rte_pci_addr *addr2);
+
+/**
+ * Utility function to compare two PCI device addresses.
+ *
+ * @param addr
+ * The PCI Bus-Device-Function address to compare
+ * @param addr2
+ * The PCI Bus-Device-Function address to compare
+ * @return
+ * 0 on equal PCI address.
+ * Positive on addr is greater than addr2.
+ * Negative on addr is less than addr2, or error.
+ */
+int rte_pci_addr_cmp(const struct rte_pci_addr *addr,
+ const struct rte_pci_addr *addr2);
+
+
+/**
+ * Utility function to parse a string into a PCI location.
+ *
+ * @param str
+ * The string to parse
+ * @param addr
+ * The reference to the structure where the location
+ * is stored.
+ * @return
+ * 0 on success
+ * <0 otherwise
+ */
+int rte_pci_addr_parse(const char *str, struct rte_pci_addr *addr);
+
+/**
+ * Map a particular resource from a file.
+ *
+ * @param requested_addr
+ * The starting address for the new mapping range.
+ * @param fd
+ * The file descriptor.
+ * @param offset
+ * The offset for the mapping range.
+ * @param size
+ * The size for the mapping range.
+ * @param additional_flags
+ * The additional flags for the mapping range.
+ * @return
+ * - On success, the function returns a pointer to the mapped area.
+ * - On error, the value MAP_FAILED is returned.
+ */
+void *pci_map_resource(void *requested_addr, int fd, off_t offset,
+ size_t size, int additional_flags);
+
+/**
+ * Unmap a particular resource.
+ *
+ * @param requested_addr
+ * The address for the unmapping range.
+ * @param size
+ * The size for the unmapping range.
+ */
+void pci_unmap_resource(void *requested_addr, size_t size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PCI_H_ */
diff --git a/lib/librte_pci/rte_pci_version.map b/lib/librte_pci/rte_pci_version.map
new file mode 100644
index 00000000..15d93d95
--- /dev/null
+++ b/lib/librte_pci/rte_pci_version.map
@@ -0,0 +1,15 @@
+DPDK_17.11 {
+ global:
+
+ eal_parse_pci_BDF;
+ eal_parse_pci_DomBDF;
+ rte_pci_addr_cmp;
+ rte_pci_addr_parse;
+ rte_pci_device_name;
+ pci_map_resource;
+ pci_unmap_resource;
+ rte_eal_compare_pci_addr;
+ rte_pci_device_name;
+
+ local: *;
+};
diff --git a/lib/librte_pdump/Makefile b/lib/librte_pdump/Makefile
index 1c03bcbb..11c3e4e9 100644
--- a/lib/librte_pdump/Makefile
+++ b/lib/librte_pdump/Makefile
@@ -37,10 +37,11 @@ LIB = librte_pdump.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
CFLAGS += -D_GNU_SOURCE
LDLIBS += -lpthread
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
EXPORT_MAP := rte_pdump_version.map
-LIBABIVER := 1
+LIBABIVER := 2
# all source are stored in SRCS-y
SRCS-$(CONFIG_RTE_LIBRTE_PDUMP) := rte_pdump.c
diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
index 729e79a3..e6182d35 100644
--- a/lib/librte_pdump/rte_pdump.c
+++ b/lib/librte_pdump/rte_pdump.c
@@ -207,7 +207,7 @@ pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params)
}
static uint16_t
-pdump_rx(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+pdump_rx(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
struct rte_mbuf **pkts, uint16_t nb_pkts,
uint16_t max_pkts __rte_unused,
void *user_params)
@@ -217,7 +217,7 @@ pdump_rx(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
}
static uint16_t
-pdump_tx(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+pdump_tx(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params)
{
pdump_copy(pkts, nb_pkts, user_params);
@@ -225,7 +225,7 @@ pdump_tx(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
}
static int
-pdump_regitser_rx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue,
+pdump_regitser_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
struct rte_ring *ring, struct rte_mempool *mp,
uint16_t operation)
{
@@ -279,7 +279,7 @@ pdump_regitser_rx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue,
}
static int
-pdump_regitser_tx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue,
+pdump_regitser_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
struct rte_ring *ring, struct rte_mempool *mp,
uint16_t operation)
{
@@ -337,7 +337,7 @@ static int
set_pdump_rxtx_cbs(struct pdump_request *p)
{
uint16_t nb_rx_q = 0, nb_tx_q = 0, end_q, queue;
- uint8_t port;
+ uint16_t port;
int ret = 0;
uint32_t flags;
uint16_t operation;
@@ -764,7 +764,7 @@ pdump_validate_flags(uint32_t flags)
}
static int
-pdump_validate_port(uint8_t port, char *name)
+pdump_validate_port(uint16_t port, char *name)
{
int ret = 0;
@@ -828,7 +828,7 @@ pdump_prepare_client_request(char *device, uint16_t queue,
}
int
-rte_pdump_enable(uint8_t port, uint16_t queue, uint32_t flags,
+rte_pdump_enable(uint16_t port, uint16_t queue, uint32_t flags,
struct rte_ring *ring,
struct rte_mempool *mp,
void *filter)
@@ -876,7 +876,7 @@ rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue,
}
int
-rte_pdump_disable(uint8_t port, uint16_t queue, uint32_t flags)
+rte_pdump_disable(uint16_t port, uint16_t queue, uint32_t flags)
{
int ret = 0;
char name[DEVICE_ID_SIZE];
diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h
index ba6e39b0..4ec0a106 100644
--- a/lib/librte_pdump/rte_pdump.h
+++ b/lib/librte_pdump/rte_pdump.h
@@ -113,7 +113,7 @@ rte_pdump_uninit(void);
*/
int
-rte_pdump_enable(uint8_t port, uint16_t queue, uint32_t flags,
+rte_pdump_enable(uint16_t port, uint16_t queue, uint32_t flags,
struct rte_ring *ring,
struct rte_mempool *mp,
void *filter);
@@ -136,7 +136,7 @@ rte_pdump_enable(uint8_t port, uint16_t queue, uint32_t flags,
*/
int
-rte_pdump_disable(uint8_t port, uint16_t queue, uint32_t flags);
+rte_pdump_disable(uint16_t port, uint16_t queue, uint32_t flags);
/**
* Enables packet capturing on given device id and queue.
diff --git a/lib/librte_pipeline/Makefile b/lib/librte_pipeline/Makefile
index 7a835fd5..a8285738 100644
--- a/lib/librte_pipeline/Makefile
+++ b/lib/librte_pipeline/Makefile
@@ -38,6 +38,8 @@ LIB = librte_pipeline.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_table
+LDLIBS += -lrte_port
EXPORT_MAP := rte_pipeline_version.map
diff --git a/lib/librte_pipeline/rte_pipeline.c b/lib/librte_pipeline/rte_pipeline.c
index 7f8fbac5..8611a88b 100644
--- a/lib/librte_pipeline/rte_pipeline.c
+++ b/lib/librte_pipeline/rte_pipeline.c
@@ -36,7 +36,6 @@
#include <rte_common.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_cycles.h>
#include <rte_prefetch.h>
#include <rte_branch_prediction.h>
diff --git a/lib/librte_port/Makefile b/lib/librte_port/Makefile
index 76629a13..139dc59a 100644
--- a/lib/librte_port/Makefile
+++ b/lib/librte_port/Makefile
@@ -38,6 +38,11 @@ LIB = librte_port.a
ifeq ($(CONFIG_RTE_PORT_PCAP),y)
LDLIBS += -lpcap
endif
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
+LDLIBS += -lrte_ip_frag -lrte_sched
+ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
+LDLIBS += -lrte_kni
+endif
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
diff --git a/lib/librte_port/rte_port_ethdev.c b/lib/librte_port/rte_port_ethdev.c
index d5c5fba5..4ed10f27 100644
--- a/lib/librte_port/rte_port_ethdev.c
+++ b/lib/librte_port/rte_port_ethdev.c
@@ -60,7 +60,7 @@ struct rte_port_ethdev_reader {
struct rte_port_in_stats stats;
uint16_t queue_id;
- uint8_t port_id;
+ uint16_t port_id;
};
static void *
@@ -156,7 +156,7 @@ struct rte_port_ethdev_writer {
uint16_t tx_buf_count;
uint64_t bsz_mask;
uint16_t queue_id;
- uint8_t port_id;
+ uint16_t port_id;
};
static void *
@@ -337,7 +337,7 @@ struct rte_port_ethdev_writer_nodrop {
uint64_t bsz_mask;
uint64_t n_retries;
uint16_t queue_id;
- uint8_t port_id;
+ uint16_t port_id;
};
static void *
diff --git a/lib/librte_port/rte_port_ethdev.h b/lib/librte_port/rte_port_ethdev.h
index 201a79e4..f5ed9ab2 100644
--- a/lib/librte_port/rte_port_ethdev.h
+++ b/lib/librte_port/rte_port_ethdev.h
@@ -54,7 +54,7 @@ extern "C" {
/** ethdev_reader port parameters */
struct rte_port_ethdev_reader_params {
/** NIC RX port ID */
- uint8_t port_id;
+ uint16_t port_id;
/** NIC RX queue ID */
uint16_t queue_id;
@@ -66,7 +66,7 @@ extern struct rte_port_in_ops rte_port_ethdev_reader_ops;
/** ethdev_writer port parameters */
struct rte_port_ethdev_writer_params {
/** NIC RX port ID */
- uint8_t port_id;
+ uint16_t port_id;
/** NIC RX queue ID */
uint16_t queue_id;
@@ -82,7 +82,7 @@ extern struct rte_port_out_ops rte_port_ethdev_writer_ops;
/** ethdev_writer_nodrop port parameters */
struct rte_port_ethdev_writer_nodrop_params {
/** NIC RX port ID */
- uint8_t port_id;
+ uint16_t port_id;
/** NIC RX queue ID */
uint16_t queue_id;
diff --git a/lib/librte_power/Makefile b/lib/librte_power/Makefile
index 06cd10e8..1b1491d7 100644
--- a/lib/librte_power/Makefile
+++ b/lib/librte_power/Makefile
@@ -35,6 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_power.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -fno-strict-aliasing
+LDLIBS += -lrte_eal
EXPORT_MAP := rte_power_version.map
diff --git a/lib/librte_power/channel_commands.h b/lib/librte_power/channel_commands.h
index 383897bf..f0f5f0a2 100644
--- a/lib/librte_power/channel_commands.h
+++ b/lib/librte_power/channel_commands.h
@@ -39,6 +39,7 @@ extern "C" {
#endif
#include <stdint.h>
+#include <stdbool.h>
/* Maximum number of channels per VM */
#define CHANNEL_CMDS_MAX_VM_CHANNELS 64
@@ -46,17 +47,60 @@ extern "C" {
/* Valid Commands */
#define CPU_POWER 1
#define CPU_POWER_CONNECT 2
+#define PKT_POLICY 3
/* CPU Power Command Scaling */
#define CPU_POWER_SCALE_UP 1
#define CPU_POWER_SCALE_DOWN 2
#define CPU_POWER_SCALE_MAX 3
#define CPU_POWER_SCALE_MIN 4
+#define CPU_POWER_ENABLE_TURBO 5
+#define CPU_POWER_DISABLE_TURBO 6
+#define HOURS 24
+
+#define MAX_VFS 10
+#define VM_MAX_NAME_SZ 32
+
+#define MAX_VCPU_PER_VM 8
+
+struct t_boost_status {
+ bool tbEnabled;
+};
+
+struct timer_profile {
+ int busy_hours[HOURS];
+ int quiet_hours[HOURS];
+ int hours_to_use_traffic_profile[HOURS];
+};
+
+enum workload {HIGH, MEDIUM, LOW};
+enum policy_to_use {
+ TRAFFIC,
+ TIME,
+ WORKLOAD
+};
+
+struct traffic {
+ uint32_t min_packet_thresh;
+ uint32_t avg_max_packet_thresh;
+ uint32_t max_max_packet_thresh;
+};
struct channel_packet {
uint64_t resource_id; /**< core_num, device */
uint32_t unit; /**< scale down/up/min/max */
uint32_t command; /**< Power, IO, etc */
+ char vm_name[VM_MAX_NAME_SZ];
+
+ uint64_t vfid[MAX_VFS];
+ int nb_mac_to_monitor;
+ struct traffic traffic_policy;
+ uint8_t vcpu_to_control[MAX_VCPU_PER_VM];
+ uint8_t num_vcpu;
+ struct timer_profile timer_policy;
+ enum workload workload;
+ enum policy_to_use policy_to_use;
+ struct t_boost_status t_boost_status;
};
diff --git a/lib/librte_power/guest_channel.c b/lib/librte_power/guest_channel.c
index 85c92fab..fa5de0f5 100644
--- a/lib/librte_power/guest_channel.c
+++ b/lib/librte_power/guest_channel.c
@@ -148,6 +148,13 @@ guest_channel_send_msg(struct channel_packet *pkt, unsigned lcore_id)
return 0;
}
+int rte_power_guest_channel_send_msg(struct channel_packet *pkt,
+ unsigned int lcore_id)
+{
+ return guest_channel_send_msg(pkt, lcore_id);
+}
+
+
void
guest_channel_host_disconnect(unsigned lcore_id)
{
diff --git a/lib/librte_power/guest_channel.h b/lib/librte_power/guest_channel.h
index 9e18af52..741339ca 100644
--- a/lib/librte_power/guest_channel.h
+++ b/lib/librte_power/guest_channel.h
@@ -81,6 +81,21 @@ void guest_channel_host_disconnect(unsigned lcore_id);
*/
int guest_channel_send_msg(struct channel_packet *pkt, unsigned lcore_id);
+/**
+ * Send a message contained in pkt over the Virtio-Serial to the host endpoint.
+ *
+ * @param pkt
+ * Pointer to a populated struct channel_packet
+ *
+ * @param lcore_id
+ * lcore_id.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int rte_power_guest_channel_send_msg(struct channel_packet *pkt,
+ unsigned int lcore_id);
#ifdef __cplusplus
}
diff --git a/lib/librte_power/rte_power.c b/lib/librte_power/rte_power.c
index 998ed1c9..b327a865 100644
--- a/lib/librte_power/rte_power.c
+++ b/lib/librte_power/rte_power.c
@@ -50,6 +50,9 @@ rte_power_freq_change_t rte_power_freq_up = NULL;
rte_power_freq_change_t rte_power_freq_down = NULL;
rte_power_freq_change_t rte_power_freq_max = NULL;
rte_power_freq_change_t rte_power_freq_min = NULL;
+rte_power_freq_change_t rte_power_turbo_status;
+rte_power_freq_change_t rte_power_freq_enable_turbo;
+rte_power_freq_change_t rte_power_freq_disable_turbo;
int
rte_power_set_env(enum power_management_env env)
@@ -65,6 +68,9 @@ rte_power_set_env(enum power_management_env env)
rte_power_freq_down = rte_power_acpi_cpufreq_freq_down;
rte_power_freq_min = rte_power_acpi_cpufreq_freq_min;
rte_power_freq_max = rte_power_acpi_cpufreq_freq_max;
+ rte_power_turbo_status = rte_power_acpi_turbo_status;
+ rte_power_freq_enable_turbo = rte_power_acpi_enable_turbo;
+ rte_power_freq_disable_turbo = rte_power_acpi_disable_turbo;
} else if (env == PM_ENV_KVM_VM) {
rte_power_freqs = rte_power_kvm_vm_freqs;
rte_power_get_freq = rte_power_kvm_vm_get_freq;
@@ -73,6 +79,9 @@ rte_power_set_env(enum power_management_env env)
rte_power_freq_down = rte_power_kvm_vm_freq_down;
rte_power_freq_min = rte_power_kvm_vm_freq_min;
rte_power_freq_max = rte_power_kvm_vm_freq_max;
+ rte_power_turbo_status = rte_power_kvm_vm_turbo_status;
+ rte_power_freq_enable_turbo = rte_power_kvm_vm_enable_turbo;
+ rte_power_freq_disable_turbo = rte_power_kvm_vm_disable_turbo;
} else {
RTE_LOG(ERR, POWER, "Invalid Power Management Environment(%d) set\n",
env);
diff --git a/lib/librte_power/rte_power.h b/lib/librte_power/rte_power.h
index 67e0ec02..b17b7a53 100644
--- a/lib/librte_power/rte_power.h
+++ b/lib/librte_power/rte_power.h
@@ -236,6 +236,47 @@ extern rte_power_freq_change_t rte_power_freq_max;
*/
extern rte_power_freq_change_t rte_power_freq_min;
+/**
+ * Query the Turbo Boost status of a specific lcore.
+ * Review each environments specific documentation for usage..
+ *
+ * @param lcore_id
+ * lcore id.
+ *
+ * @return
+ * - 1 Turbo Boost is enabled for this lcore.
+ * - 0 Turbo Boost is disabled for this lcore.
+ * - Negative on error.
+ */
+extern rte_power_freq_change_t rte_power_turbo_status;
+
+/**
+ * Enable Turbo Boost for this lcore.
+ * Review each environments specific documentation for usage..
+ *
+ * @param lcore_id
+ * lcore id.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+extern rte_power_freq_change_t rte_power_freq_enable_turbo;
+
+/**
+ * Disable Turbo Boost for this lcore.
+ * Review each environments specific documentation for usage..
+ *
+ * @param lcore_id
+ * lcore id.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+extern rte_power_freq_change_t rte_power_freq_disable_turbo;
+
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_power/rte_power_acpi_cpufreq.c b/lib/librte_power/rte_power_acpi_cpufreq.c
index a56c9b59..01ac5acb 100644
--- a/lib/librte_power/rte_power_acpi_cpufreq.c
+++ b/lib/librte_power/rte_power_acpi_cpufreq.c
@@ -87,6 +87,14 @@
#define POWER_SYSFILE_SETSPEED \
"/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed"
+/*
+ * MSR related
+ */
+#define PLATFORM_INFO 0x0CE
+#define TURBO_RATIO_LIMIT 0x1AD
+#define IA32_PERF_CTL 0x199
+#define CORE_TURBO_DISABLE_BIT ((uint64_t)1<<32)
+
enum power_state {
POWER_IDLE = 0,
POWER_ONGOING,
@@ -105,6 +113,8 @@ struct rte_power_info {
char governor_ori[32]; /**< Original governor name */
uint32_t curr_idx; /**< Freq index in freqs array */
volatile uint32_t state; /**< Power in use state */
+ uint16_t turbo_available; /**< Turbo Boost available */
+ uint16_t turbo_enable; /**< Turbo Boost enable/disable */
} __rte_cache_aligned;
static struct rte_power_info lcore_power_info[RTE_MAX_LCORE];
@@ -244,6 +254,18 @@ power_get_available_freqs(struct rte_power_info *pi)
POWER_CONVERT_TO_DECIMAL);
}
+ if ((pi->freqs[0]-1000) == pi->freqs[1]) {
+ pi->turbo_available = 1;
+ pi->turbo_enable = 1;
+ POWER_DEBUG_TRACE("Lcore %u Can do Turbo Boost\n",
+ pi->lcore_id);
+ } else {
+ pi->turbo_available = 0;
+ pi->turbo_enable = 0;
+ POWER_DEBUG_TRACE("Turbo Boost not available on Lcore %u\n",
+ pi->lcore_id);
+ }
+
ret = 0;
POWER_DEBUG_TRACE("%d frequencie(s) of lcore %u are available\n",
count, pi->lcore_id);
@@ -525,7 +547,17 @@ rte_power_acpi_cpufreq_freq_max(unsigned lcore_id)
}
/* Frequencies in the array are from high to low. */
- return set_freq_internal(&lcore_power_info[lcore_id], 0);
+ if (lcore_power_info[lcore_id].turbo_available) {
+ if (lcore_power_info[lcore_id].turbo_enable)
+ /* Set to Turbo */
+ return set_freq_internal(
+ &lcore_power_info[lcore_id], 0);
+ else
+ /* Set to max non-turbo */
+ return set_freq_internal(
+ &lcore_power_info[lcore_id], 1);
+ } else
+ return set_freq_internal(&lcore_power_info[lcore_id], 0);
}
int
@@ -543,3 +575,80 @@ rte_power_acpi_cpufreq_freq_min(unsigned lcore_id)
/* Frequencies in the array are from high to low. */
return set_freq_internal(pi, pi->nb_freqs - 1);
}
+
+
+int
+rte_power_acpi_turbo_status(unsigned int lcore_id)
+{
+ struct rte_power_info *pi;
+
+ if (lcore_id >= RTE_MAX_LCORE) {
+ RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+ return -1;
+ }
+
+ pi = &lcore_power_info[lcore_id];
+
+ return pi->turbo_enable;
+}
+
+
+int
+rte_power_acpi_enable_turbo(unsigned int lcore_id)
+{
+ struct rte_power_info *pi;
+
+ if (lcore_id >= RTE_MAX_LCORE) {
+ RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+ return -1;
+ }
+
+ pi = &lcore_power_info[lcore_id];
+
+ if (pi->turbo_available)
+ pi->turbo_enable = 1;
+ else {
+ pi->turbo_enable = 0;
+ RTE_LOG(ERR, POWER,
+ "Failed to enable turbo on lcore %u\n",
+ lcore_id);
+ return -1;
+ }
+
+ /* Max may have changed, so call to max function */
+ if (rte_power_acpi_cpufreq_freq_max(lcore_id) < 0) {
+ RTE_LOG(ERR, POWER,
+ "Failed to set frequency of lcore %u to max\n",
+ lcore_id);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+rte_power_acpi_disable_turbo(unsigned int lcore_id)
+{
+ struct rte_power_info *pi;
+
+ if (lcore_id >= RTE_MAX_LCORE) {
+ RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+ return -1;
+ }
+
+ pi = &lcore_power_info[lcore_id];
+
+ pi->turbo_enable = 0;
+
+ if ((pi->turbo_available) && (pi->curr_idx <= 1)) {
+ /* Try to set freq to max by default coming out of turbo */
+ if (rte_power_acpi_cpufreq_freq_max(lcore_id) < 0) {
+ RTE_LOG(ERR, POWER,
+ "Failed to set frequency of lcore %u to max\n",
+ lcore_id);
+ return -1;
+ }
+ }
+
+ return 0;
+}
diff --git a/lib/librte_power/rte_power_acpi_cpufreq.h b/lib/librte_power/rte_power_acpi_cpufreq.h
index 68578e9b..eee0ca0a 100644
--- a/lib/librte_power/rte_power_acpi_cpufreq.h
+++ b/lib/librte_power/rte_power_acpi_cpufreq.h
@@ -185,6 +185,46 @@ int rte_power_acpi_cpufreq_freq_max(unsigned lcore_id);
*/
int rte_power_acpi_cpufreq_freq_min(unsigned lcore_id);
+/**
+ * Get the turbo status of a specific lcore.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ * lcore id.
+ *
+ * @return
+ * - 1 Turbo Boost is enabled on this lcore.
+ * - 0 Turbo Boost is disabled on this lcore.
+ * - Negative on error.
+ */
+int rte_power_acpi_turbo_status(unsigned int lcore_id);
+
+/**
+ * Enable Turbo Boost on a specific lcore.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ * lcore id.
+ *
+ * @return
+ * - 0 Turbo Boost is enabled successfully on this lcore.
+ * - Negative on error.
+ */
+int rte_power_acpi_enable_turbo(unsigned int lcore_id);
+
+/**
+ * Disable Turbo Boost on a specific lcore.
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ * lcore id.
+ *
+ * @return
+ * - 0 Turbo Boost disabled successfully on this lcore.
+ * - Negative on error.
+ */
+int rte_power_acpi_disable_turbo(unsigned int lcore_id);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_power/rte_power_kvm_vm.c b/lib/librte_power/rte_power_kvm_vm.c
index a1badf34..99060625 100644
--- a/lib/librte_power/rte_power_kvm_vm.c
+++ b/lib/librte_power/rte_power_kvm_vm.c
@@ -134,3 +134,22 @@ rte_power_kvm_vm_freq_min(unsigned lcore_id)
{
return send_msg(lcore_id, CPU_POWER_SCALE_MIN);
}
+
+int
+rte_power_kvm_vm_turbo_status(__attribute__((unused)) unsigned int lcore_id)
+{
+ RTE_LOG(ERR, POWER, "rte_power_turbo_status is not implemented for Virtual Machine Power Management\n");
+ return -ENOTSUP;
+}
+
+int
+rte_power_kvm_vm_enable_turbo(unsigned int lcore_id)
+{
+ return send_msg(lcore_id, CPU_POWER_ENABLE_TURBO);
+}
+
+int
+rte_power_kvm_vm_disable_turbo(unsigned int lcore_id)
+{
+ return send_msg(lcore_id, CPU_POWER_DISABLE_TURBO);
+}
diff --git a/lib/librte_power/rte_power_kvm_vm.h b/lib/librte_power/rte_power_kvm_vm.h
index dcbc878a..9af41d64 100644
--- a/lib/librte_power/rte_power_kvm_vm.h
+++ b/lib/librte_power/rte_power_kvm_vm.h
@@ -172,8 +172,41 @@ int rte_power_kvm_vm_freq_max(unsigned lcore_id);
*/
int rte_power_kvm_vm_freq_min(unsigned lcore_id);
+/**
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ * lcore id.
+ *
+ * @return
+ * -ENOTSUP
+ */
+int rte_power_kvm_vm_turbo_status(unsigned int lcore_id);
+
+/**
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ * lcore id.
+ *
+ * @return
+ * - 1 on success.
+ * - Negative on error.
+ */
+int rte_power_kvm_vm_enable_turbo(unsigned int lcore_id);
+
+/**
+ * It should be protected outside of this function for threadsafe.
+ *
+ * @param lcore_id
+ * lcore id.
+ *
+ * @return
+ * - 1 on success.
+ * - Negative on error.
+ */
+int rte_power_kvm_vm_disable_turbo(unsigned int lcore_id);
#ifdef __cplusplus
}
#endif
-
#endif
diff --git a/lib/librte_power/rte_power_version.map b/lib/librte_power/rte_power_version.map
index db75ff3e..96dc42ec 100644
--- a/lib/librte_power/rte_power_version.map
+++ b/lib/librte_power/rte_power_version.map
@@ -16,3 +16,13 @@ DPDK_2.0 {
local: *;
};
+
+DPDK_17.11 {
+ global:
+
+ rte_power_guest_channel_send_msg;
+ rte_power_freq_disable_turbo;
+ rte_power_freq_enable_turbo;
+ rte_power_turbo_status;
+
+} DPDK_2.0; \ No newline at end of file
diff --git a/lib/librte_reorder/Makefile b/lib/librte_reorder/Makefile
index 4e44e72f..5d38d712 100644
--- a/lib/librte_reorder/Makefile
+++ b/lib/librte_reorder/Makefile
@@ -36,6 +36,7 @@ LIB = librte_reorder.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf
EXPORT_MAP := rte_reorder_version.map
diff --git a/lib/librte_reorder/rte_reorder.c b/lib/librte_reorder/rte_reorder.c
index 010dff68..867775da 100644
--- a/lib/librte_reorder/rte_reorder.c
+++ b/lib/librte_reorder/rte_reorder.c
@@ -36,7 +36,6 @@
#include <rte_log.h>
#include <rte_mbuf.h>
-#include <rte_memzone.h>
#include <rte_eal_memconfig.h>
#include <rte_errno.h>
#include <rte_malloc.h>
diff --git a/lib/librte_ring/Makefile b/lib/librte_ring/Makefile
index 3e2f4b87..e34d9d95 100644
--- a/lib/librte_ring/Makefile
+++ b/lib/librte_ring/Makefile
@@ -35,6 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_ring.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+LDLIBS += -lrte_eal
EXPORT_MAP := rte_ring_version.map
diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h
index 8f5a4937..5e9b3b7b 100644
--- a/lib/librte_ring/rte_ring.h
+++ b/lib/librte_ring/rte_ring.h
@@ -174,7 +174,7 @@ struct rte_ring {
* ring space will be wasted.
*/
#define RING_F_EXACT_SZ 0x0004
-#define RTE_RING_SZ_MASK (unsigned)(0x0fffffff) /**< Ring size mask */
+#define RTE_RING_SZ_MASK (0x7fffffffU) /**< Ring size mask */
/* @internal defines for passing to the enqueue dequeue worker functions */
#define __IS_SP 1
diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
index 18274e73..04785f72 100644
--- a/lib/librte_sched/Makefile
+++ b/lib/librte_sched/Makefile
@@ -43,6 +43,8 @@ CFLAGS_rte_red.o := -D_GNU_SOURCE
LDLIBS += -lm
LDLIBS += -lrt
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_net
+LDLIBS += -lrte_timer
EXPORT_MAP := rte_sched_version.map
@@ -55,7 +57,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_reciprocal.c
# install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_sched_common.h rte_red.h rte_approx.h
SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_reciprocal.h
include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index b7cba110..a2d0d685 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -42,9 +42,9 @@
#include <rte_prefetch.h>
#include <rte_branch_prediction.h>
#include <rte_mbuf.h>
+#include <rte_bitmap.h>
#include "rte_sched.h"
-#include "rte_bitmap.h"
#include "rte_sched_common.h"
#include "rte_approx.h"
#include "rte_reciprocal.h"
diff --git a/lib/librte_security/Makefile b/lib/librte_security/Makefile
new file mode 100644
index 00000000..bb93ec33
--- /dev/null
+++ b/lib/librte_security/Makefile
@@ -0,0 +1,54 @@
+# BSD LICENSE
+#
+# Copyright(c) 2017 Intel Corporation. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_security.a
+
+# library version
+LIBABIVER := 1
+
+# build flags
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_eal -lrte_mempool
+
+# library source files
+SRCS-y += rte_security.c
+
+# export include files
+SYMLINK-y-include += rte_security.h
+SYMLINK-y-include += rte_security_driver.h
+
+# versioning export map
+EXPORT_MAP := rte_security_version.map
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_security/rte_security.c b/lib/librte_security/rte_security.c
new file mode 100644
index 00000000..1227fca8
--- /dev/null
+++ b/lib/librte_security/rte_security.c
@@ -0,0 +1,149 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 NXP.
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of NXP nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_malloc.h>
+#include <rte_dev.h>
+
+#include "rte_security.h"
+#include "rte_security_driver.h"
+
+struct rte_security_session *
+rte_security_session_create(struct rte_security_ctx *instance,
+ struct rte_security_session_conf *conf,
+ struct rte_mempool *mp)
+{
+ struct rte_security_session *sess = NULL;
+
+ if (conf == NULL)
+ return NULL;
+
+ RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_create, NULL);
+
+ if (rte_mempool_get(mp, (void *)&sess))
+ return NULL;
+
+ if (instance->ops->session_create(instance->device, conf, sess, mp)) {
+ rte_mempool_put(mp, (void *)sess);
+ return NULL;
+ }
+ instance->sess_cnt++;
+
+ return sess;
+}
+
+int
+rte_security_session_update(struct rte_security_ctx *instance,
+ struct rte_security_session *sess,
+ struct rte_security_session_conf *conf)
+{
+ RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_update, -ENOTSUP);
+ return instance->ops->session_update(instance->device, sess, conf);
+}
+
+int
+rte_security_session_stats_get(struct rte_security_ctx *instance,
+ struct rte_security_session *sess,
+ struct rte_security_stats *stats)
+{
+ RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_stats_get, -ENOTSUP);
+ return instance->ops->session_stats_get(instance->device, sess, stats);
+}
+
+int
+rte_security_session_destroy(struct rte_security_ctx *instance,
+ struct rte_security_session *sess)
+{
+ int ret;
+ struct rte_mempool *mp = rte_mempool_from_obj(sess);
+
+ RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_destroy, -ENOTSUP);
+
+ if (instance->sess_cnt)
+ instance->sess_cnt--;
+
+ ret = instance->ops->session_destroy(instance->device, sess);
+ if (!ret)
+ rte_mempool_put(mp, (void *)sess);
+
+ return ret;
+}
+
+int
+rte_security_set_pkt_metadata(struct rte_security_ctx *instance,
+ struct rte_security_session *sess,
+ struct rte_mbuf *m, void *params)
+{
+ RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->set_pkt_metadata, -ENOTSUP);
+ return instance->ops->set_pkt_metadata(instance->device,
+ sess, m, params);
+}
+
+const struct rte_security_capability *
+rte_security_capabilities_get(struct rte_security_ctx *instance)
+{
+ RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->capabilities_get, NULL);
+ return instance->ops->capabilities_get(instance->device);
+}
+
+const struct rte_security_capability *
+rte_security_capability_get(struct rte_security_ctx *instance,
+ struct rte_security_capability_idx *idx)
+{
+ const struct rte_security_capability *capabilities;
+ const struct rte_security_capability *capability;
+ uint16_t i = 0;
+
+ RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->capabilities_get, NULL);
+ capabilities = instance->ops->capabilities_get(instance->device);
+
+ if (capabilities == NULL)
+ return NULL;
+
+ while ((capability = &capabilities[i++])->action
+ != RTE_SECURITY_ACTION_TYPE_NONE) {
+ if (capability->action == idx->action &&
+ capability->protocol == idx->protocol) {
+ if (idx->protocol == RTE_SECURITY_PROTOCOL_IPSEC) {
+ if (capability->ipsec.proto ==
+ idx->ipsec.proto &&
+ capability->ipsec.mode ==
+ idx->ipsec.mode &&
+ capability->ipsec.direction ==
+ idx->ipsec.direction)
+ return capability;
+ }
+ }
+ }
+
+ return NULL;
+}
diff --git a/lib/librte_security/rte_security.h b/lib/librte_security/rte_security.h
new file mode 100644
index 00000000..7e687d29
--- /dev/null
+++ b/lib/librte_security/rte_security.h
@@ -0,0 +1,529 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 NXP.
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of NXP nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_SECURITY_H_
+#define _RTE_SECURITY_H_
+
+/**
+ * @file rte_security.h
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * RTE Security Common Definitions
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+
+#include <rte_common.h>
+#include <rte_crypto.h>
+#include <rte_mbuf.h>
+#include <rte_memory.h>
+#include <rte_mempool.h>
+
+/** IPSec protocol mode */
+enum rte_security_ipsec_sa_mode {
+ RTE_SECURITY_IPSEC_SA_MODE_TRANSPORT,
+ /**< IPSec Transport mode */
+ RTE_SECURITY_IPSEC_SA_MODE_TUNNEL,
+ /**< IPSec Tunnel mode */
+};
+
+/** IPSec Protocol */
+enum rte_security_ipsec_sa_protocol {
+ RTE_SECURITY_IPSEC_SA_PROTO_AH,
+ /**< AH protocol */
+ RTE_SECURITY_IPSEC_SA_PROTO_ESP,
+ /**< ESP protocol */
+};
+
+/** IPSEC tunnel type */
+enum rte_security_ipsec_tunnel_type {
+ RTE_SECURITY_IPSEC_TUNNEL_IPV4,
+ /**< Outer header is IPv4 */
+ RTE_SECURITY_IPSEC_TUNNEL_IPV6,
+ /**< Outer header is IPv6 */
+};
+
+/**
+ * Security context for crypto/eth devices
+ *
+ * Security instance for each driver to register security operations.
+ * The application can get the security context from the crypto/eth device id
+ * using the APIs rte_cryptodev_get_sec_ctx()/rte_eth_dev_get_sec_ctx()
+ * This structure is used to identify the device(crypto/eth) for which the
+ * security operations need to be performed.
+ */
+struct rte_security_ctx {
+ void *device;
+ /**< Crypto/ethernet device attached */
+ struct rte_security_ops *ops;
+ /**< Pointer to security ops for the device */
+ uint16_t sess_cnt;
+ /**< Number of sessions attached to this context */
+};
+
+/**
+ * IPSEC tunnel parameters
+ *
+ * These parameters are used to build outbound tunnel headers.
+ */
+struct rte_security_ipsec_tunnel_param {
+ enum rte_security_ipsec_tunnel_type type;
+ /**< Tunnel type: IPv4 or IPv6 */
+ RTE_STD_C11
+ union {
+ struct {
+ struct in_addr src_ip;
+ /**< IPv4 source address */
+ struct in_addr dst_ip;
+ /**< IPv4 destination address */
+ uint8_t dscp;
+ /**< IPv4 Differentiated Services Code Point */
+ uint8_t df;
+ /**< IPv4 Don't Fragment bit */
+ uint8_t ttl;
+ /**< IPv4 Time To Live */
+ } ipv4;
+ /**< IPv4 header parameters */
+ struct {
+ struct in6_addr src_addr;
+ /**< IPv6 source address */
+ struct in6_addr dst_addr;
+ /**< IPv6 destination address */
+ uint8_t dscp;
+ /**< IPv6 Differentiated Services Code Point */
+ uint32_t flabel;
+ /**< IPv6 flow label */
+ uint8_t hlimit;
+ /**< IPv6 hop limit */
+ } ipv6;
+ /**< IPv6 header parameters */
+ };
+};
+
+/**
+ * IPsec Security Association option flags
+ */
+struct rte_security_ipsec_sa_options {
+ /**< Extended Sequence Numbers (ESN)
+ *
+ * * 1: Use extended (64 bit) sequence numbers
+ * * 0: Use normal sequence numbers
+ */
+ uint32_t esn : 1;
+
+ /**< UDP encapsulation
+ *
+ * * 1: Do UDP encapsulation/decapsulation so that IPSEC packets can
+ * traverse through NAT boxes.
+ * * 0: No UDP encapsulation
+ */
+ uint32_t udp_encap : 1;
+
+ /**< Copy DSCP bits
+ *
+ * * 1: Copy IPv4 or IPv6 DSCP bits from inner IP header to
+ * the outer IP header in encapsulation, and vice versa in
+ * decapsulation.
+ * * 0: Do not change DSCP field.
+ */
+ uint32_t copy_dscp : 1;
+
+ /**< Copy IPv6 Flow Label
+ *
+ * * 1: Copy IPv6 flow label from inner IPv6 header to the
+ * outer IPv6 header.
+ * * 0: Outer header is not modified.
+ */
+ uint32_t copy_flabel : 1;
+
+ /**< Copy IPv4 Don't Fragment bit
+ *
+ * * 1: Copy the DF bit from the inner IPv4 header to the outer
+ * IPv4 header.
+ * * 0: Outer header is not modified.
+ */
+ uint32_t copy_df : 1;
+
+ /**< Decrement inner packet Time To Live (TTL) field
+ *
+ * * 1: In tunnel mode, decrement inner packet IPv4 TTL or
+ * IPv6 Hop Limit after tunnel decapsulation, or before tunnel
+ * encapsulation.
+ * * 0: Inner packet is not modified.
+ */
+ uint32_t dec_ttl : 1;
+};
+
+/** IPSec security association direction */
+enum rte_security_ipsec_sa_direction {
+ RTE_SECURITY_IPSEC_SA_DIR_EGRESS,
+ /**< Encrypt and generate digest */
+ RTE_SECURITY_IPSEC_SA_DIR_INGRESS,
+ /**< Verify digest and decrypt */
+};
+
+/**
+ * IPsec security association configuration data.
+ *
+ * This structure contains data required to create an IPsec SA security session.
+ */
+struct rte_security_ipsec_xform {
+ uint32_t spi;
+ /**< SA security parameter index */
+ uint32_t salt;
+ /**< SA salt */
+ struct rte_security_ipsec_sa_options options;
+ /**< various SA options */
+ enum rte_security_ipsec_sa_direction direction;
+ /**< IPSec SA Direction - Egress/Ingress */
+ enum rte_security_ipsec_sa_protocol proto;
+ /**< IPsec SA Protocol - AH/ESP */
+ enum rte_security_ipsec_sa_mode mode;
+ /**< IPsec SA Mode - transport/tunnel */
+ struct rte_security_ipsec_tunnel_param tunnel;
+ /**< Tunnel parameters, NULL for transport mode */
+};
+
+/**
+ * MACsec security session configuration
+ */
+struct rte_security_macsec_xform {
+ /** To be Filled */
+};
+
+/**
+ * Security session action type.
+ */
+enum rte_security_session_action_type {
+ RTE_SECURITY_ACTION_TYPE_NONE,
+ /**< No security actions */
+ RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO,
+ /**< Crypto processing for security protocol is processed inline
+ * during transmission
+ */
+ RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL,
+ /**< All security protocol processing is performed inline during
+ * transmission
+ */
+ RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL
+ /**< All security protocol processing including crypto is performed
+ * on a lookaside accelerator
+ */
+};
+
+/** Security session protocol definition */
+enum rte_security_session_protocol {
+ RTE_SECURITY_PROTOCOL_IPSEC,
+ /**< IPsec Protocol */
+ RTE_SECURITY_PROTOCOL_MACSEC,
+ /**< MACSec Protocol */
+};
+
+/**
+ * Security session configuration
+ */
+struct rte_security_session_conf {
+ enum rte_security_session_action_type action_type;
+ /**< Type of action to be performed on the session */
+ enum rte_security_session_protocol protocol;
+ /**< Security protocol to be configured */
+ union {
+ struct rte_security_ipsec_xform ipsec;
+ struct rte_security_macsec_xform macsec;
+ };
+ /**< Configuration parameters for security session */
+ struct rte_crypto_sym_xform *crypto_xform;
+ /**< Security Session Crypto Transformations */
+};
+
+struct rte_security_session {
+ void *sess_private_data;
+ /**< Private session material */
+};
+
+/**
+ * Create security session as specified by the session configuration
+ *
+ * @param instance security instance
+ * @param conf session configuration parameters
+ * @param mp mempool to allocate session objects from
+ * @return
+ * - On success, pointer to session
+ * - On failure, NULL
+ */
+struct rte_security_session *
+rte_security_session_create(struct rte_security_ctx *instance,
+ struct rte_security_session_conf *conf,
+ struct rte_mempool *mp);
+
+/**
+ * Update security session as specified by the session configuration
+ *
+ * @param instance security instance
+ * @param sess session to update parameters
+ * @param conf update configuration parameters
+ * @return
+ * - On success returns 0
+ * - On failure return errno
+ */
+int
+rte_security_session_update(struct rte_security_ctx *instance,
+ struct rte_security_session *sess,
+ struct rte_security_session_conf *conf);
+
+/**
+ * Free security session header and the session private data and
+ * return it to its original mempool.
+ *
+ * @param instance security instance
+ * @param sess security session to freed
+ *
+ * @return
+ * - 0 if successful.
+ * - -EINVAL if session is NULL.
+ * - -EBUSY if not all device private data has been freed.
+ */
+int
+rte_security_session_destroy(struct rte_security_ctx *instance,
+ struct rte_security_session *sess);
+
+/**
+ * Updates the buffer with device-specific defined metadata
+ *
+ * @param instance security instance
+ * @param sess security session
+ * @param mb packet mbuf to set metadata on.
+ * @param params device-specific defined parameters
+ * required for metadata
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+rte_security_set_pkt_metadata(struct rte_security_ctx *instance,
+ struct rte_security_session *sess,
+ struct rte_mbuf *mb, void *params);
+
+/**
+ * Attach a session to a symmetric crypto operation
+ *
+ * @param sym_op crypto operation
+ * @param sess security session
+ */
+static inline int
+__rte_security_attach_session(struct rte_crypto_sym_op *sym_op,
+ struct rte_security_session *sess)
+{
+ sym_op->sec_session = sess;
+
+ return 0;
+}
+
+static inline void *
+get_sec_session_private_data(const struct rte_security_session *sess)
+{
+ return sess->sess_private_data;
+}
+
+static inline void
+set_sec_session_private_data(struct rte_security_session *sess,
+ void *private_data)
+{
+ sess->sess_private_data = private_data;
+}
+
+/**
+ * Attach a session to a crypto operation.
+ * This API is needed only in case of RTE_SECURITY_SESS_CRYPTO_PROTO_OFFLOAD
+ * For other rte_security_session_action_type, ol_flags in rte_mbuf may be
+ * defined to perform security operations.
+ *
+ * @param op crypto operation
+ * @param sess security session
+ */
+static inline int
+rte_security_attach_session(struct rte_crypto_op *op,
+ struct rte_security_session *sess)
+{
+ if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC))
+ return -EINVAL;
+
+ op->sess_type = RTE_CRYPTO_OP_SECURITY_SESSION;
+
+ return __rte_security_attach_session(op->sym, sess);
+}
+
+struct rte_security_macsec_stats {
+ uint64_t reserved;
+};
+
+struct rte_security_ipsec_stats {
+ uint64_t reserved;
+
+};
+
+struct rte_security_stats {
+ enum rte_security_session_protocol protocol;
+ /**< Security protocol to be configured */
+
+ union {
+ struct rte_security_macsec_stats macsec;
+ struct rte_security_ipsec_stats ipsec;
+ };
+};
+
+/**
+ * Get security session statistics
+ *
+ * @param instance security instance
+ * @param sess security session
+ * @param stats statistics
+ * @return
+ * - On success return 0
+ * - On failure errno
+ */
+int
+rte_security_session_stats_get(struct rte_security_ctx *instance,
+ struct rte_security_session *sess,
+ struct rte_security_stats *stats);
+
+/**
+ * Security capability definition
+ */
+struct rte_security_capability {
+ enum rte_security_session_action_type action;
+ /**< Security action type*/
+ enum rte_security_session_protocol protocol;
+ /**< Security protocol */
+ RTE_STD_C11
+ union {
+ struct {
+ enum rte_security_ipsec_sa_protocol proto;
+ /**< IPsec SA protocol */
+ enum rte_security_ipsec_sa_mode mode;
+ /**< IPsec SA mode */
+ enum rte_security_ipsec_sa_direction direction;
+ /**< IPsec SA direction */
+ struct rte_security_ipsec_sa_options options;
+ /**< IPsec SA supported options */
+ } ipsec;
+ /**< IPsec capability */
+ struct {
+ /* To be Filled */
+ } macsec;
+ /**< MACsec capability */
+ };
+
+ const struct rte_cryptodev_capabilities *crypto_capabilities;
+ /**< Corresponding crypto capabilities for security capability */
+
+ uint32_t ol_flags;
+ /**< Device offload flags */
+};
+
+#define RTE_SECURITY_TX_OLOAD_NEED_MDATA 0x00000001
+/**< HW needs metadata update, see rte_security_set_pkt_metadata().
+ */
+
+#define RTE_SECURITY_TX_HW_TRAILER_OFFLOAD 0x00000002
+/**< HW constructs trailer of packets
+ * Transmitted packets will have the trailer added to them
+ * by hardawre. The next protocol field will be based on
+ * the mbuf->inner_esp_next_proto field.
+ */
+#define RTE_SECURITY_RX_HW_TRAILER_OFFLOAD 0x00010000
+/**< HW removes trailer of packets
+ * Received packets have no trailer, the next protocol field
+ * is supplied in the mbuf->inner_esp_next_proto field.
+ * Inner packet is not modified.
+ */
+
+/**
+ * Security capability index used to query a security instance for a specific
+ * security capability
+ */
+struct rte_security_capability_idx {
+ enum rte_security_session_action_type action;
+ enum rte_security_session_protocol protocol;
+
+ union {
+ struct {
+ enum rte_security_ipsec_sa_protocol proto;
+ enum rte_security_ipsec_sa_mode mode;
+ enum rte_security_ipsec_sa_direction direction;
+ } ipsec;
+ };
+};
+
+/**
+ * Returns array of security instance capabilities
+ *
+ * @param instance Security instance.
+ *
+ * @return
+ * - Returns array of security capabilities.
+ * - Return NULL if no capabilities available.
+ */
+const struct rte_security_capability *
+rte_security_capabilities_get(struct rte_security_ctx *instance);
+
+/**
+ * Query if a specific capability is available on security instance
+ *
+ * @param instance security instance.
+ * @param idx security capability index to match against
+ *
+ * @return
+ * - Returns pointer to security capability on match of capability
+ * index criteria.
+ * - Return NULL if the capability not matched on security instance.
+ */
+const struct rte_security_capability *
+rte_security_capability_get(struct rte_security_ctx *instance,
+ struct rte_security_capability_idx *idx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SECURITY_H_ */
diff --git a/lib/librte_security/rte_security_driver.h b/lib/librte_security/rte_security_driver.h
new file mode 100644
index 00000000..997fbe79
--- /dev/null
+++ b/lib/librte_security/rte_security_driver.h
@@ -0,0 +1,156 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * Copyright 2017 NXP.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_SECURITY_DRIVER_H_
+#define _RTE_SECURITY_DRIVER_H_
+
+/**
+ * @file rte_security_driver.h
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * RTE Security Common Definitions
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "rte_security.h"
+
+/**
+ * Configure a security session on a device.
+ *
+ * @param device Crypto/eth device pointer
+ * @param conf Security session configuration
+ * @param sess Pointer to Security private session structure
+ * @param mp Mempool where the private session is allocated
+ *
+ * @return
+ * - Returns 0 if private session structure have been created successfully.
+ * - Returns -EINVAL if input parameters are invalid.
+ * - Returns -ENOTSUP if crypto device does not support the crypto transform.
+ * - Returns -ENOMEM if the private session could not be allocated.
+ */
+typedef int (*security_session_create_t)(void *device,
+ struct rte_security_session_conf *conf,
+ struct rte_security_session *sess,
+ struct rte_mempool *mp);
+
+/**
+ * Free driver private session data.
+ *
+ * @param dev Crypto/eth device pointer
+ * @param sess Security session structure
+ */
+typedef int (*security_session_destroy_t)(void *device,
+ struct rte_security_session *sess);
+
+/**
+ * Update driver private session data.
+ *
+ * @param device Crypto/eth device pointer
+ * @param sess Pointer to Security private session structure
+ * @param conf Security session configuration
+ *
+ * @return
+ * - Returns 0 if private session structure have been updated successfully.
+ * - Returns -EINVAL if input parameters are invalid.
+ * - Returns -ENOTSUP if crypto device does not support the crypto transform.
+ */
+typedef int (*security_session_update_t)(void *device,
+ struct rte_security_session *sess,
+ struct rte_security_session_conf *conf);
+/**
+ * Get stats from the PMD.
+ *
+ * @param device Crypto/eth device pointer
+ * @param sess Pointer to Security private session structure
+ * @param stats Security stats of the driver
+ *
+ * @return
+ * - Returns 0 if private session structure have been updated successfully.
+ * - Returns -EINVAL if session parameters are invalid.
+ */
+typedef int (*security_session_stats_get_t)(void *device,
+ struct rte_security_session *sess,
+ struct rte_security_stats *stats);
+
+/**
+ * Update the mbuf with provided metadata.
+ *
+ * @param sess Security session structure
+ * @param mb Packet buffer
+ * @param mt Metadata
+ *
+ * @return
+ * - Returns 0 if metadata updated successfully.
+ * - Returns -ve value for errors.
+ */
+typedef int (*security_set_pkt_metadata_t)(void *device,
+ struct rte_security_session *sess, struct rte_mbuf *m,
+ void *params);
+
+/**
+ * Get security capabilities of the device.
+ *
+ * @param device crypto/eth device pointer
+ *
+ * @return
+ * - Returns rte_security_capability pointer on success.
+ * - Returns NULL on error.
+ */
+typedef const struct rte_security_capability *(*security_capabilities_get_t)(
+ void *device);
+
+/** Security operations function pointer table */
+struct rte_security_ops {
+ security_session_create_t session_create;
+ /**< Configure a security session. */
+ security_session_update_t session_update;
+ /**< Update a security session. */
+ security_session_stats_get_t session_stats_get;
+ /**< Get security session statistics. */
+ security_session_destroy_t session_destroy;
+ /**< Clear a security sessions private data. */
+ security_set_pkt_metadata_t set_pkt_metadata;
+ /**< Update mbuf metadata. */
+ security_capabilities_get_t capabilities_get;
+ /**< Get security capabilities. */
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SECURITY_DRIVER_H_ */
diff --git a/lib/librte_security/rte_security_version.map b/lib/librte_security/rte_security_version.map
new file mode 100644
index 00000000..e12c04b2
--- /dev/null
+++ b/lib/librte_security/rte_security_version.map
@@ -0,0 +1,14 @@
+EXPERIMENTAL {
+ global:
+
+ rte_security_attach_session;
+ rte_security_capabilities_get;
+ rte_security_capability_get;
+ rte_security_session_create;
+ rte_security_session_destroy;
+ rte_security_session_stats_get;
+ rte_security_session_update;
+ rte_security_set_pkt_metadata;
+
+ local: *;
+};
diff --git a/lib/librte_table/Makefile b/lib/librte_table/Makefile
index 8ddc8804..2e32fbf1 100644
--- a/lib/librte_table/Makefile
+++ b/lib/librte_table/Makefile
@@ -38,10 +38,15 @@ LIB = librte_table.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_port
+LDLIBS += -lrte_lpm -lrte_hash
+ifeq ($(CONFIG_RTE_LIBRTE_ACL),y)
+LDLIBS += -lrte_acl
+endif
EXPORT_MAP := rte_table_version.map
-LIBABIVER := 2
+LIBABIVER := 3
#
# all source are stored in SRCS-y
diff --git a/lib/librte_table/rte_table_hash.h b/lib/librte_table/rte_table_hash.h
index 57505a6f..15f1902b 100644
--- a/lib/librte_table/rte_table_hash.h
+++ b/lib/librte_table/rte_table_hash.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -45,8 +45,6 @@ extern "C" {
* These tables use the exact match criterion to uniquely associate data to
* lookup keys.
*
- * Use-cases: Flow classification table, Address Resolution Protocol (ARP) table
- *
* Hash table types:
* 1. Entry add strategy on bucket full:
* a. Least Recently Used (LRU): One of the existing keys in the bucket is
@@ -59,7 +57,7 @@ extern "C" {
* to the bucket, it also becomes the new MRU key. When a key needs to
* be picked and dropped, the most likely candidate for drop, i.e. the
* current LRU key, is always picked. The LRU logic requires maintaining
- * specific data structures per each bucket.
+ * specific data structures per each bucket. Use-cases: flow cache, etc.
* b. Extendible bucket (ext): The bucket is extended with space for 4 more
* keys. This is done by allocating additional memory at table init time,
* which is used to create a pool of free keys (the size of this pool is
@@ -73,20 +71,8 @@ extern "C" {
* first group of 4 keys, the search continues beyond the first group of
* 4 keys, potentially until all keys in this bucket are examined. The
* extendible bucket logic requires maintaining specific data structures
- * per table and per each bucket.
- * 2. Key signature computation:
- * a. Pre-computed key signature: The key lookup operation is split between
- * two CPU cores. The first CPU core (typically the CPU core performing
- * packet RX) extracts the key from the input packet, computes the key
- * signature and saves both the key and the key signature in the packet
- * buffer as packet meta-data. The second CPU core reads both the key and
- * the key signature from the packet meta-data and performs the bucket
- * search step of the key lookup operation.
- * b. Key signature computed on lookup (do-sig): The same CPU core reads
- * the key from the packet meta-data, uses it to compute the key
- * signature and also performs the bucket search step of the key lookup
- * operation.
- * 3. Key size:
+ * per table and per each bucket. Use-cases: flow table, etc.
+ * 2. Key size:
* a. Configurable key size
* b. Single key size (8-byte, 16-byte or 32-byte key size)
*
@@ -98,59 +84,28 @@ extern "C" {
/** Hash function */
typedef uint64_t (*rte_table_hash_op_hash)(
void *key,
+ void *key_mask,
uint32_t key_size,
uint64_t seed);
-/**
- * Hash tables with configurable key size
- *
- */
-/** Extendible bucket hash table parameters */
-struct rte_table_hash_ext_params {
+/** Hash table parameters */
+struct rte_table_hash_params {
+ /** Name */
+ const char *name;
+
/** Key size (number of bytes) */
uint32_t key_size;
- /** Maximum number of keys */
- uint32_t n_keys;
-
- /** Number of hash table buckets. Each bucket stores up to 4 keys. */
- uint32_t n_buckets;
-
- /** Number of hash table bucket extensions. Each bucket extension has
- space for 4 keys and each bucket can have 0, 1 or more extensions. */
- uint32_t n_buckets_ext;
-
- /** Hash function */
- rte_table_hash_op_hash f_hash;
-
- /** Seed value for the hash function */
- uint64_t seed;
-
- /** Byte offset within packet meta-data where the 4-byte key signature
- is located. Valid for pre-computed key signature tables, ignored for
- do-sig tables. */
- uint32_t signature_offset;
-
/** Byte offset within packet meta-data where the key is located */
uint32_t key_offset;
-};
-/** Extendible bucket hash table operations for pre-computed key signature */
-extern struct rte_table_ops rte_table_hash_ext_ops;
-
-/** Extendible bucket hash table operations for key signature computed on
- lookup ("do-sig") */
-extern struct rte_table_ops rte_table_hash_ext_dosig_ops;
-
-/** LRU hash table parameters */
-struct rte_table_hash_lru_params {
- /** Key size (number of bytes) */
- uint32_t key_size;
+ /** Key mask */
+ uint8_t *key_mask;
- /** Maximum number of keys */
+ /** Number of keys */
uint32_t n_keys;
- /** Number of hash table buckets. Each bucket stores up to 4 keys. */
+ /** Number of buckets */
uint32_t n_buckets;
/** Hash function */
@@ -158,239 +113,23 @@ struct rte_table_hash_lru_params {
/** Seed value for the hash function */
uint64_t seed;
-
- /** Byte offset within packet meta-data where the 4-byte key signature
- is located. Valid for pre-computed key signature tables, ignored for
- do-sig tables. */
- uint32_t signature_offset;
-
- /** Byte offset within packet meta-data where the key is located */
- uint32_t key_offset;
-};
-
-/** LRU hash table operations for pre-computed key signature */
-extern struct rte_table_ops rte_table_hash_lru_ops;
-
-/** LRU hash table operations for key signature computed on lookup ("do-sig") */
-extern struct rte_table_ops rte_table_hash_lru_dosig_ops;
-
-/**
- * 8-byte key hash tables
- *
- */
-/** LRU hash table parameters */
-struct rte_table_hash_key8_lru_params {
- /** Maximum number of entries (and keys) in the table */
- uint32_t n_entries;
-
- /** Hash function */
- rte_table_hash_op_hash f_hash;
-
- /** Seed for the hash function */
- uint64_t seed;
-
- /** Byte offset within packet meta-data where the 4-byte key signature
- is located. Valid for pre-computed key signature tables, ignored for
- do-sig tables. */
- uint32_t signature_offset;
-
- /** Byte offset within packet meta-data where the key is located */
- uint32_t key_offset;
-
- /** Bit-mask to be AND-ed to the key on lookup */
- uint8_t *key_mask;
};
-/** LRU hash table operations for pre-computed key signature */
-extern struct rte_table_ops rte_table_hash_key8_lru_ops;
-
-/** LRU hash table operations for key signature computed on lookup ("do-sig") */
-extern struct rte_table_ops rte_table_hash_key8_lru_dosig_ops;
-
-/** Extendible bucket hash table parameters */
-struct rte_table_hash_key8_ext_params {
- /** Maximum number of entries (and keys) in the table */
- uint32_t n_entries;
-
- /** Number of entries (and keys) for hash table bucket extensions. Each
- bucket is extended in increments of 4 keys. */
- uint32_t n_entries_ext;
-
- /** Hash function */
- rte_table_hash_op_hash f_hash;
-
- /** Seed for the hash function */
- uint64_t seed;
-
- /** Byte offset within packet meta-data where the 4-byte key signature
- is located. Valid for pre-computed key signature tables, ignored for
- do-sig tables. */
- uint32_t signature_offset;
-
- /** Byte offset within packet meta-data where the key is located */
- uint32_t key_offset;
-
- /** Bit-mask to be AND-ed to the key on lookup */
- uint8_t *key_mask;
-};
-
-/** Extendible bucket hash table operations for pre-computed key signature */
+/** Extendible bucket hash table operations */
+extern struct rte_table_ops rte_table_hash_ext_ops;
extern struct rte_table_ops rte_table_hash_key8_ext_ops;
-
-/** Extendible bucket hash table operations for key signature computed on
- lookup ("do-sig") */
-extern struct rte_table_ops rte_table_hash_key8_ext_dosig_ops;
-
-/**
- * 16-byte key hash tables
- *
- */
-/** LRU hash table parameters */
-struct rte_table_hash_key16_lru_params {
- /** Maximum number of entries (and keys) in the table */
- uint32_t n_entries;
-
- /** Hash function */
- rte_table_hash_op_hash f_hash;
-
- /** Seed for the hash function */
- uint64_t seed;
-
- /** Byte offset within packet meta-data where the 4-byte key signature
- is located. Valid for pre-computed key signature tables, ignored for
- do-sig tables. */
- uint32_t signature_offset;
-
- /** Byte offset within packet meta-data where the key is located */
- uint32_t key_offset;
-
- /** Bit-mask to be AND-ed to the key on lookup */
- uint8_t *key_mask;
-};
-
-/** LRU hash table operations for pre-computed key signature */
-extern struct rte_table_ops rte_table_hash_key16_lru_ops;
-
-/** LRU hash table operations for key signature computed on lookup
- ("do-sig") */
-extern struct rte_table_ops rte_table_hash_key16_lru_dosig_ops;
-
-/** Extendible bucket hash table parameters */
-struct rte_table_hash_key16_ext_params {
- /** Maximum number of entries (and keys) in the table */
- uint32_t n_entries;
-
- /** Number of entries (and keys) for hash table bucket extensions. Each
- bucket is extended in increments of 4 keys. */
- uint32_t n_entries_ext;
-
- /** Hash function */
- rte_table_hash_op_hash f_hash;
-
- /** Seed for the hash function */
- uint64_t seed;
-
- /** Byte offset within packet meta-data where the 4-byte key signature
- is located. Valid for pre-computed key signature tables, ignored for
- do-sig tables. */
- uint32_t signature_offset;
-
- /** Byte offset within packet meta-data where the key is located */
- uint32_t key_offset;
-
- /** Bit-mask to be AND-ed to the key on lookup */
- uint8_t *key_mask;
-};
-
-/** Extendible bucket operations for pre-computed key signature */
extern struct rte_table_ops rte_table_hash_key16_ext_ops;
-
-/** Extendible bucket hash table operations for key signature computed on
- lookup ("do-sig") */
-extern struct rte_table_ops rte_table_hash_key16_ext_dosig_ops;
-
-/**
- * 32-byte key hash tables
- *
- */
-/** LRU hash table parameters */
-struct rte_table_hash_key32_lru_params {
- /** Maximum number of entries (and keys) in the table */
- uint32_t n_entries;
-
- /** Hash function */
- rte_table_hash_op_hash f_hash;
-
- /** Seed for the hash function */
- uint64_t seed;
-
- /** Byte offset within packet meta-data where the 4-byte key signature
- is located. Valid for pre-computed key signature tables, ignored for
- do-sig tables. */
- uint32_t signature_offset;
-
- /** Byte offset within packet meta-data where the key is located */
- uint32_t key_offset;
-};
-
-/** LRU hash table operations for pre-computed key signature */
-extern struct rte_table_ops rte_table_hash_key32_lru_ops;
-
-/** Extendible bucket hash table parameters */
-struct rte_table_hash_key32_ext_params {
- /** Maximum number of entries (and keys) in the table */
- uint32_t n_entries;
-
- /** Number of entries (and keys) for hash table bucket extensions. Each
- bucket is extended in increments of 4 keys. */
- uint32_t n_entries_ext;
-
- /** Hash function */
- rte_table_hash_op_hash f_hash;
-
- /** Seed for the hash function */
- uint64_t seed;
-
- /** Byte offset within packet meta-data where the 4-byte key signature
- is located. Valid for pre-computed key signature tables, ignored for
- do-sig tables. */
- uint32_t signature_offset;
-
- /** Byte offset within packet meta-data where the key is located */
- uint32_t key_offset;
-};
-
-/** Extendible bucket hash table operations */
extern struct rte_table_ops rte_table_hash_key32_ext_ops;
-/** Cuckoo hash table parameters */
-struct rte_table_hash_cuckoo_params {
- /** Key size (number of bytes */
- uint32_t key_size;
-
- /** Maximum number of hash table entries */
- uint32_t n_keys;
-
- /** Hash function used to calculate hash */
- rte_table_hash_op_hash f_hash;
-
- /** Seed value or Init value used by f_hash */
- uint32_t seed;
-
- /** Byte offset within packet meta-data where the 4-byte key signature
- is located. Valid for pre-computed key signature tables, ignored for
- do-sig tables. */
- uint32_t signature_offset;
-
- /** Byte offset within packet meta-data where the key is located */
- uint32_t key_offset;
+/** LRU hash table operations */
+extern struct rte_table_ops rte_table_hash_lru_ops;
- /** Hash table name */
- const char *name;
-};
+extern struct rte_table_ops rte_table_hash_key8_lru_ops;
+extern struct rte_table_ops rte_table_hash_key16_lru_ops;
+extern struct rte_table_ops rte_table_hash_key32_lru_ops;
/** Cuckoo hash table operations */
-extern struct rte_table_ops rte_table_hash_cuckoo_dosig_ops;
+extern struct rte_table_ops rte_table_hash_cuckoo_ops;
#ifdef __cplusplus
}
diff --git a/lib/librte_table/rte_table_hash_cuckoo.c b/lib/librte_table/rte_table_hash_cuckoo.c
index da1597fa..f3845c75 100644
--- a/lib/librte_table/rte_table_hash_cuckoo.c
+++ b/lib/librte_table/rte_table_hash_cuckoo.c
@@ -1,34 +1,34 @@
/*-
- * BSD LICENSE
+ * BSD LICENSE
*
- * Copyright(c) 2016 Intel Corporation. All rights reserved.
- * All rights reserved.
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
*
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
*
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#include <stdio.h>
@@ -66,25 +66,28 @@ struct rte_table_hash {
uint32_t n_keys;
rte_table_hash_op_hash f_hash;
uint32_t seed;
- uint32_t signature_offset;
uint32_t key_offset;
- const char *name;
/* cuckoo hash table object */
struct rte_hash *h_table;
/* Lookup table */
- uint8_t memory[0] __rte_cache_aligned; };
+ uint8_t memory[0] __rte_cache_aligned;
+};
static int
-check_params_create_hash_cuckoo(const struct
-rte_table_hash_cuckoo_params *params) {
- /* Check for valid parameters */
+check_params_create_hash_cuckoo(struct rte_table_hash_params *params)
+{
if (params == NULL) {
RTE_LOG(ERR, TABLE, "NULL Input Parameters.\n");
return -EINVAL;
}
+ if (params->name == NULL) {
+ RTE_LOG(ERR, TABLE, "Table name is NULL.\n");
+ return -EINVAL;
+ }
+
if (params->key_size == 0) {
RTE_LOG(ERR, TABLE, "Invalid key_size.\n");
return -EINVAL;
@@ -100,11 +103,6 @@ rte_table_hash_cuckoo_params *params) {
return -EINVAL;
}
- if (params->name == NULL) {
- RTE_LOG(ERR, TABLE, "Table name is NULL.\n");
- return -EINVAL;
- }
-
return 0;
}
@@ -113,34 +111,24 @@ rte_table_hash_cuckoo_create(void *params,
int socket_id,
uint32_t entry_size)
{
- struct rte_hash *rte_hash_handle;
+ struct rte_table_hash_params *p = params;
+ struct rte_hash *h_table;
struct rte_table_hash *t;
- uint32_t total_size, total_cl_size;
+ uint32_t total_size;
/* Check input parameters */
- struct rte_table_hash_cuckoo_params *p =
- (struct rte_table_hash_cuckoo_params *) params;
-
if (check_params_create_hash_cuckoo(params))
return NULL;
/* Memory allocation */
- total_cl_size =
- (sizeof(struct rte_table_hash) +
- RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE;
- total_cl_size += (p->n_keys * entry_size +
- RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE;
- total_size = total_cl_size * RTE_CACHE_LINE_SIZE;
-
- t = rte_zmalloc_socket("TABLE",
- total_size,
- RTE_CACHE_LINE_SIZE,
- socket_id);
+ total_size = sizeof(struct rte_table_hash) +
+ RTE_CACHE_LINE_ROUNDUP(p->n_keys * entry_size);
+
+ t = rte_zmalloc_socket(p->name, total_size, RTE_CACHE_LINE_SIZE, socket_id);
if (t == NULL) {
RTE_LOG(ERR, TABLE,
- "%s: Cannot allocate %u bytes for Cuckoo hash table\n",
- __func__,
- (uint32_t)sizeof(struct rte_table_hash));
+ "%s: Cannot allocate %u bytes for cuckoo hash table %s\n",
+ __func__, total_size, p->name);
return NULL;
}
@@ -154,13 +142,13 @@ rte_table_hash_cuckoo_create(void *params,
.name = p->name
};
- rte_hash_handle = rte_hash_find_existing(p->name);
- if (rte_hash_handle == NULL) {
- rte_hash_handle = rte_hash_create(&hash_cuckoo_params);
- if (NULL == rte_hash_handle) {
+ h_table = rte_hash_find_existing(p->name);
+ if (h_table == NULL) {
+ h_table = rte_hash_create(&hash_cuckoo_params);
+ if (h_table == NULL) {
RTE_LOG(ERR, TABLE,
- "%s: failed to create cuckoo hash table. keysize: %u",
- __func__, hash_cuckoo_params.key_len);
+ "%s: failed to create cuckoo hash table %s\n",
+ __func__, p->name);
rte_free(t);
return NULL;
}
@@ -172,26 +160,22 @@ rte_table_hash_cuckoo_create(void *params,
t->n_keys = p->n_keys;
t->f_hash = p->f_hash;
t->seed = p->seed;
- t->signature_offset = p->signature_offset;
t->key_offset = p->key_offset;
- t->name = p->name;
- t->h_table = rte_hash_handle;
+ t->h_table = h_table;
RTE_LOG(INFO, TABLE,
- "%s: Cuckoo Hash table memory footprint is %u bytes\n",
- __func__, total_size);
+ "%s: Cuckoo hash table %s memory footprint is %u bytes\n",
+ __func__, p->name, total_size);
return t;
}
static int
rte_table_hash_cuckoo_free(void *table) {
- if (table == NULL) {
- RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
- return -EINVAL;
- }
-
struct rte_table_hash *t = table;
+ if (table == NULL)
+ return -EINVAL;
+
rte_hash_free(t->h_table);
rte_free(t);
@@ -200,25 +184,18 @@ rte_table_hash_cuckoo_free(void *table) {
static int
rte_table_hash_cuckoo_entry_add(void *table, void *key, void *entry,
- int *key_found, void **entry_ptr) {
+ int *key_found, void **entry_ptr)
+{
+ struct rte_table_hash *t = table;
int pos = 0;
- if (table == NULL) {
- RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
- return -EINVAL;
- }
-
- if (key == NULL) {
- RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__);
- return -EINVAL;
- }
-
- if (entry == NULL) {
- RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__);
+ /* Check input parameters */
+ if ((table == NULL) ||
+ (key == NULL) ||
+ (entry == NULL) ||
+ (key_found == NULL) ||
+ (entry_ptr == NULL))
return -EINVAL;
- }
-
- struct rte_table_hash *t = table;
/* Find Existing entries */
pos = rte_hash_lookup(t->h_table, key);
@@ -231,17 +208,15 @@ rte_table_hash_cuckoo_entry_add(void *table, void *key, void *entry,
*entry_ptr = existing_entry;
return 0;
-} else if (pos == -ENOENT) {
- /* Entry not found. Adding new entry */
+ }
+
+ if (pos == -ENOENT) {
+ /* Entry not found. Adding new entry */
uint8_t *new_entry;
pos = rte_hash_add_key(t->h_table, key);
- if (pos < 0) {
- RTE_LOG(ERR, TABLE,
- "%s: Entry not added, status : %u\n",
- __func__, pos);
+ if (pos < 0)
return pos;
- }
new_entry = &t->memory[pos * t->entry_size];
memcpy(new_entry, entry, t->entry_size);
@@ -250,25 +225,22 @@ rte_table_hash_cuckoo_entry_add(void *table, void *key, void *entry,
*entry_ptr = new_entry;
return 0;
}
+
return pos;
}
static int
rte_table_hash_cuckoo_entry_delete(void *table, void *key,
- int *key_found, __rte_unused void *entry) {
+ int *key_found, void *entry)
+{
+ struct rte_table_hash *t = table;
int pos = 0;
- if (table == NULL) {
- RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
- return -EINVAL;
- }
-
- if (key == NULL) {
- RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__);
+ /* Check input parameters */
+ if ((table == NULL) ||
+ (key == NULL) ||
+ (key_found == NULL))
return -EINVAL;
- }
-
- struct rte_table_hash *t = table;
pos = rte_hash_del_key(t->h_table, key);
if (pos >= 0) {
@@ -279,20 +251,21 @@ rte_table_hash_cuckoo_entry_delete(void *table, void *key,
memcpy(entry, entry_ptr, t->entry_size);
memset(&t->memory[pos * t->entry_size], 0, t->entry_size);
+ return 0;
}
+ *key_found = 0;
return pos;
}
-
static int
-rte_table_hash_cuckoo_lookup_dosig(void *table,
+rte_table_hash_cuckoo_lookup(void *table,
struct rte_mbuf **pkts,
uint64_t pkts_mask,
uint64_t *lookup_hit_mask,
void **entries)
{
- struct rte_table_hash *t = (struct rte_table_hash *)table;
+ struct rte_table_hash *t = table;
uint64_t pkts_mask_out = 0;
uint32_t i;
@@ -301,20 +274,19 @@ rte_table_hash_cuckoo_lookup_dosig(void *table,
RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(t, n_pkts_in);
if ((pkts_mask & (pkts_mask + 1)) == 0) {
- const uint8_t *keys[64];
- int32_t positions[64], status;
+ const uint8_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
+ int32_t positions[RTE_PORT_IN_BURST_SIZE_MAX], status;
/* Keys for bulk lookup */
for (i = 0; i < n_pkts_in; i++)
keys[i] = RTE_MBUF_METADATA_UINT8_PTR(pkts[i],
- t->key_offset);
+ t->key_offset);
/* Bulk Lookup */
status = rte_hash_lookup_bulk(t->h_table,
(const void **) keys,
n_pkts_in,
positions);
-
if (status == 0) {
for (i = 0; i < n_pkts_in; i++) {
if (likely(positions[i] >= 0)) {
@@ -326,7 +298,7 @@ rte_table_hash_cuckoo_lookup_dosig(void *table,
}
}
}
- } else {
+ } else
for (i = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX
- __builtin_clzll(pkts_mask)); i++) {
uint64_t pkt_mask = 1LLU << i;
@@ -345,7 +317,6 @@ rte_table_hash_cuckoo_lookup_dosig(void *table,
}
}
}
- }
*lookup_hit_mask = pkts_mask_out;
RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(t,
@@ -370,13 +341,13 @@ rte_table_hash_cuckoo_stats_read(void *table, struct rte_table_stats *stats,
return 0;
}
-struct rte_table_ops rte_table_hash_cuckoo_dosig_ops = {
+struct rte_table_ops rte_table_hash_cuckoo_ops = {
.f_create = rte_table_hash_cuckoo_create,
.f_free = rte_table_hash_cuckoo_free,
.f_add = rte_table_hash_cuckoo_entry_add,
.f_delete = rte_table_hash_cuckoo_entry_delete,
.f_add_bulk = NULL,
.f_delete_bulk = NULL,
- .f_lookup = rte_table_hash_cuckoo_lookup_dosig,
+ .f_lookup = rte_table_hash_cuckoo_lookup,
.f_stats = rte_table_hash_cuckoo_stats_read,
};
diff --git a/lib/librte_table/rte_table_hash_ext.c b/lib/librte_table/rte_table_hash_ext.c
index e7181026..3af1bcab 100644
--- a/lib/librte_table/rte_table_hash_ext.c
+++ b/lib/librte_table/rte_table_hash_ext.c
@@ -1,34 +1,34 @@
/*-
- * BSD LICENSE
+ * BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
*
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
*
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
@@ -106,7 +106,6 @@ struct rte_table_hash {
uint32_t n_buckets_ext;
rte_table_hash_op_hash f_hash;
uint64_t seed;
- uint32_t signature_offset;
uint32_t key_offset;
/* Internal */
@@ -120,6 +119,7 @@ struct rte_table_hash {
struct grinder grinders[RTE_PORT_IN_BURST_SIZE_MAX];
/* Tables */
+ uint64_t *key_mask;
struct bucket *buckets;
struct bucket *buckets_ext;
uint8_t *key_mem;
@@ -132,29 +132,53 @@ struct rte_table_hash {
};
static int
-check_params_create(struct rte_table_hash_ext_params *params)
+keycmp(void *a, void *b, void *b_mask, uint32_t n_bytes)
{
- uint32_t n_buckets_min;
+ uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask;
+ uint32_t i;
+
+ for (i = 0; i < n_bytes / sizeof(uint64_t); i++)
+ if (a64[i] != (b64[i] & b_mask64[i]))
+ return 1;
+
+ return 0;
+}
+
+static void
+keycpy(void *dst, void *src, void *src_mask, uint32_t n_bytes)
+{
+ uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask;
+ uint32_t i;
+
+ for (i = 0; i < n_bytes / sizeof(uint64_t); i++)
+ dst64[i] = src64[i] & src_mask64[i];
+}
+
+static int
+check_params_create(struct rte_table_hash_params *params)
+{
+ /* name */
+ if (params->name == NULL) {
+ RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__);
+ return -EINVAL;
+ }
/* key_size */
- if ((params->key_size == 0) ||
+ if ((params->key_size < sizeof(uint64_t)) ||
(!rte_is_power_of_2(params->key_size))) {
RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
return -EINVAL;
}
/* n_keys */
- if ((params->n_keys == 0) ||
- (!rte_is_power_of_2(params->n_keys))) {
+ if (params->n_keys == 0) {
RTE_LOG(ERR, TABLE, "%s: n_keys invalid value\n", __func__);
return -EINVAL;
}
/* n_buckets */
- n_buckets_min = (params->n_keys + KEYS_PER_BUCKET - 1) / params->n_keys;
if ((params->n_buckets == 0) ||
- (!rte_is_power_of_2(params->n_keys)) ||
- (params->n_buckets < n_buckets_min)) {
+ (!rte_is_power_of_2(params->n_buckets))) {
RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
return -EINVAL;
}
@@ -171,15 +195,13 @@ check_params_create(struct rte_table_hash_ext_params *params)
static void *
rte_table_hash_ext_create(void *params, int socket_id, uint32_t entry_size)
{
- struct rte_table_hash_ext_params *p =
- params;
+ struct rte_table_hash_params *p = params;
struct rte_table_hash *t;
- uint32_t total_size, table_meta_sz;
- uint32_t bucket_sz, bucket_ext_sz, key_sz;
- uint32_t key_stack_sz, bkt_ext_stack_sz, data_sz;
- uint32_t bucket_offset, bucket_ext_offset, key_offset;
- uint32_t key_stack_offset, bkt_ext_stack_offset, data_offset;
- uint32_t i;
+ uint64_t table_meta_sz, key_mask_sz, bucket_sz, bucket_ext_sz, key_sz;
+ uint64_t key_stack_sz, bkt_ext_stack_sz, data_sz, total_size;
+ uint64_t key_mask_offset, bucket_offset, bucket_ext_offset, key_offset;
+ uint64_t key_stack_offset, bkt_ext_stack_offset, data_offset;
+ uint32_t n_buckets_ext, i;
/* Check input parameters */
if ((check_params_create(p) != 0) ||
@@ -188,38 +210,66 @@ rte_table_hash_ext_create(void *params, int socket_id, uint32_t entry_size)
(sizeof(struct bucket) != (RTE_CACHE_LINE_SIZE / 2)))
return NULL;
+ /*
+ * Table dimensioning
+ *
+ * Objective: Pick the number of bucket extensions (n_buckets_ext) so that
+ * it is guaranteed that n_keys keys can be stored in the table at any time.
+ *
+ * The worst case scenario takes place when all the n_keys keys fall into
+ * the same bucket. Actually, due to the KEYS_PER_BUCKET scheme, the worst
+ * case takes place when (n_keys - KEYS_PER_BUCKET + 1) keys fall into the
+ * same bucket, while the remaining (KEYS_PER_BUCKET - 1) keys each fall
+ * into a different bucket. This case defeats the purpose of the hash table.
+ * It indicates unsuitable f_hash or n_keys to n_buckets ratio.
+ *
+ * n_buckets_ext = n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1
+ */
+ n_buckets_ext = p->n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1;
+
/* Memory allocation */
table_meta_sz = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_table_hash));
+ key_mask_sz = RTE_CACHE_LINE_ROUNDUP(p->key_size);
bucket_sz = RTE_CACHE_LINE_ROUNDUP(p->n_buckets * sizeof(struct bucket));
bucket_ext_sz =
- RTE_CACHE_LINE_ROUNDUP(p->n_buckets_ext * sizeof(struct bucket));
+ RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(struct bucket));
key_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * p->key_size);
key_stack_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * sizeof(uint32_t));
bkt_ext_stack_sz =
- RTE_CACHE_LINE_ROUNDUP(p->n_buckets_ext * sizeof(uint32_t));
+ RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(uint32_t));
data_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * entry_size);
- total_size = table_meta_sz + bucket_sz + bucket_ext_sz + key_sz +
- key_stack_sz + bkt_ext_stack_sz + data_sz;
+ total_size = table_meta_sz + key_mask_sz + bucket_sz + bucket_ext_sz +
+ key_sz + key_stack_sz + bkt_ext_stack_sz + data_sz;
- t = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+ if (total_size > SIZE_MAX) {
+ RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes"
+ " for hash table %s\n",
+ __func__, total_size, p->name);
+ return NULL;
+ }
+
+ t = rte_zmalloc_socket(p->name,
+ (size_t)total_size,
+ RTE_CACHE_LINE_SIZE,
+ socket_id);
if (t == NULL) {
- RTE_LOG(ERR, TABLE,
- "%s: Cannot allocate %u bytes for hash table\n",
- __func__, total_size);
+ RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes"
+ " for hash table %s\n",
+ __func__, total_size, p->name);
return NULL;
}
- RTE_LOG(INFO, TABLE, "%s (%u-byte key): Hash table memory footprint is "
- "%u bytes\n", __func__, p->key_size, total_size);
+ RTE_LOG(INFO, TABLE, "%s (%u-byte key): Hash table %s memory "
+ "footprint is %" PRIu64 " bytes\n",
+ __func__, p->key_size, p->name, total_size);
/* Memory initialization */
t->key_size = p->key_size;
t->entry_size = entry_size;
t->n_keys = p->n_keys;
t->n_buckets = p->n_buckets;
- t->n_buckets_ext = p->n_buckets_ext;
+ t->n_buckets_ext = n_buckets_ext;
t->f_hash = p->f_hash;
t->seed = p->seed;
- t->signature_offset = p->signature_offset;
t->key_offset = p->key_offset;
/* Internal */
@@ -228,13 +278,15 @@ rte_table_hash_ext_create(void *params, int socket_id, uint32_t entry_size)
t->data_size_shl = __builtin_ctzl(entry_size);
/* Tables */
- bucket_offset = 0;
+ key_mask_offset = 0;
+ bucket_offset = key_mask_offset + key_mask_sz;
bucket_ext_offset = bucket_offset + bucket_sz;
key_offset = bucket_ext_offset + bucket_ext_sz;
key_stack_offset = key_offset + key_sz;
bkt_ext_stack_offset = key_stack_offset + key_stack_sz;
data_offset = bkt_ext_stack_offset + bkt_ext_stack_sz;
+ t->key_mask = (uint64_t *) &t->memory[key_mask_offset];
t->buckets = (struct bucket *) &t->memory[bucket_offset];
t->buckets_ext = (struct bucket *) &t->memory[bucket_ext_offset];
t->key_mem = &t->memory[key_offset];
@@ -242,6 +294,12 @@ rte_table_hash_ext_create(void *params, int socket_id, uint32_t entry_size)
t->bkt_ext_stack = (uint32_t *) &t->memory[bkt_ext_stack_offset];
t->data_mem = &t->memory[data_offset];
+ /* Key mask */
+ if (p->key_mask == NULL)
+ memset(t->key_mask, 0xFF, p->key_size);
+ else
+ memcpy(t->key_mask, p->key_mask, p->key_size);
+
/* Key stack */
for (i = 0; i < t->n_keys; i++)
t->key_stack[i] = t->n_keys - 1 - i;
@@ -277,7 +335,7 @@ rte_table_hash_ext_entry_add(void *table, void *key, void *entry,
uint64_t sig;
uint32_t bkt_index, i;
- sig = t->f_hash(key, t->key_size, t->seed);
+ sig = t->f_hash(key, t->key_mask, t->key_size, t->seed);
bkt_index = sig & t->bucket_mask;
bkt0 = &t->buckets[bkt_index];
sig = (sig >> 16) | 1LLU;
@@ -290,7 +348,7 @@ rte_table_hash_ext_entry_add(void *table, void *key, void *entry,
uint8_t *bkt_key =
&t->key_mem[bkt_key_index << t->key_size_shl];
- if ((sig == bkt_sig) && (memcmp(key, bkt_key,
+ if ((sig == bkt_sig) && (keycmp(bkt_key, key, t->key_mask,
t->key_size) == 0)) {
uint8_t *data = &t->data_mem[bkt_key_index <<
t->data_size_shl];
@@ -327,7 +385,7 @@ rte_table_hash_ext_entry_add(void *table, void *key, void *entry,
bkt->sig[i] = (uint16_t) sig;
bkt->key_pos[i] = bkt_key_index;
- memcpy(bkt_key, key, t->key_size);
+ keycpy(bkt_key, key, t->key_mask, t->key_size);
memcpy(data, entry, t->entry_size);
*key_found = 0;
@@ -358,7 +416,7 @@ rte_table_hash_ext_entry_add(void *table, void *key, void *entry,
/* Install new key into bucket */
bkt->sig[0] = (uint16_t) sig;
bkt->key_pos[0] = bkt_key_index;
- memcpy(bkt_key, key, t->key_size);
+ keycpy(bkt_key, key, t->key_mask, t->key_size);
memcpy(data, entry, t->entry_size);
*key_found = 0;
@@ -378,7 +436,7 @@ void *entry)
uint64_t sig;
uint32_t bkt_index, i;
- sig = t->f_hash(key, t->key_size, t->seed);
+ sig = t->f_hash(key, t->key_mask, t->key_size, t->seed);
bkt_index = sig & t->bucket_mask;
bkt0 = &t->buckets[bkt_index];
sig = (sig >> 16) | 1LLU;
@@ -392,7 +450,7 @@ void *entry)
uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
t->key_size_shl];
- if ((sig == bkt_sig) && (memcmp(key, bkt_key,
+ if ((sig == bkt_sig) && (keycmp(bkt_key, key, t->key_mask,
t->key_size) == 0)) {
uint8_t *data = &t->data_mem[bkt_key_index <<
t->data_size_shl];
@@ -437,8 +495,7 @@ static int rte_table_hash_ext_lookup_unoptimized(
struct rte_mbuf **pkts,
uint64_t pkts_mask,
uint64_t *lookup_hit_mask,
- void **entries,
- int dosig)
+ void **entries)
{
struct rte_table_hash *t = (struct rte_table_hash *) table;
uint64_t pkts_mask_out = 0;
@@ -458,11 +515,7 @@ static int rte_table_hash_ext_lookup_unoptimized(
pkt = pkts[pkt_index];
key = RTE_MBUF_METADATA_UINT8_PTR(pkt, t->key_offset);
- if (dosig)
- sig = (uint64_t) t->f_hash(key, t->key_size, t->seed);
- else
- sig = RTE_MBUF_METADATA_UINT32(pkt,
- t->signature_offset);
+ sig = (uint64_t) t->f_hash(key, t->key_mask, t->key_size, t->seed);
bkt_index = sig & t->bucket_mask;
bkt0 = &t->buckets[bkt_index];
@@ -476,8 +529,8 @@ static int rte_table_hash_ext_lookup_unoptimized(
uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
t->key_size_shl];
- if ((sig == bkt_sig) && (memcmp(key, bkt_key,
- t->key_size) == 0)) {
+ if ((sig == bkt_sig) && (keycmp(bkt_key, key,
+ t->key_mask, t->key_size) == 0)) {
uint8_t *data = &t->data_mem[
bkt_key_index << t->data_size_shl];
@@ -576,11 +629,12 @@ static int rte_table_hash_ext_lookup_unoptimized(
{ \
uint64_t *pkt_key = RTE_MBUF_METADATA_UINT64_PTR(mbuf, f->key_offset);\
uint64_t *bkt_key = (uint64_t *) key; \
+ uint64_t *key_mask = f->key_mask; \
\
switch (f->key_size) { \
case 8: \
{ \
- uint64_t xor = pkt_key[0] ^ bkt_key[0]; \
+ uint64_t xor = (pkt_key[0] & key_mask[0]) ^ bkt_key[0]; \
match_key = 0; \
if (xor == 0) \
match_key = 1; \
@@ -591,8 +645,8 @@ static int rte_table_hash_ext_lookup_unoptimized(
{ \
uint64_t xor[2], or; \
\
- xor[0] = pkt_key[0] ^ bkt_key[0]; \
- xor[1] = pkt_key[1] ^ bkt_key[1]; \
+ xor[0] = (pkt_key[0] & key_mask[0]) ^ bkt_key[0]; \
+ xor[1] = (pkt_key[1] & key_mask[1]) ^ bkt_key[1]; \
or = xor[0] | xor[1]; \
match_key = 0; \
if (or == 0) \
@@ -604,10 +658,10 @@ static int rte_table_hash_ext_lookup_unoptimized(
{ \
uint64_t xor[4], or; \
\
- xor[0] = pkt_key[0] ^ bkt_key[0]; \
- xor[1] = pkt_key[1] ^ bkt_key[1]; \
- xor[2] = pkt_key[2] ^ bkt_key[2]; \
- xor[3] = pkt_key[3] ^ bkt_key[3]; \
+ xor[0] = (pkt_key[0] & key_mask[0]) ^ bkt_key[0]; \
+ xor[1] = (pkt_key[1] & key_mask[1]) ^ bkt_key[1]; \
+ xor[2] = (pkt_key[2] & key_mask[2]) ^ bkt_key[2]; \
+ xor[3] = (pkt_key[3] & key_mask[3]) ^ bkt_key[3]; \
or = xor[0] | xor[1] | xor[2] | xor[3]; \
match_key = 0; \
if (or == 0) \
@@ -619,14 +673,14 @@ static int rte_table_hash_ext_lookup_unoptimized(
{ \
uint64_t xor[8], or; \
\
- xor[0] = pkt_key[0] ^ bkt_key[0]; \
- xor[1] = pkt_key[1] ^ bkt_key[1]; \
- xor[2] = pkt_key[2] ^ bkt_key[2]; \
- xor[3] = pkt_key[3] ^ bkt_key[3]; \
- xor[4] = pkt_key[4] ^ bkt_key[4]; \
- xor[5] = pkt_key[5] ^ bkt_key[5]; \
- xor[6] = pkt_key[6] ^ bkt_key[6]; \
- xor[7] = pkt_key[7] ^ bkt_key[7]; \
+ xor[0] = (pkt_key[0] & key_mask[0]) ^ bkt_key[0]; \
+ xor[1] = (pkt_key[1] & key_mask[1]) ^ bkt_key[1]; \
+ xor[2] = (pkt_key[2] & key_mask[2]) ^ bkt_key[2]; \
+ xor[3] = (pkt_key[3] & key_mask[3]) ^ bkt_key[3]; \
+ xor[4] = (pkt_key[4] & key_mask[4]) ^ bkt_key[4]; \
+ xor[5] = (pkt_key[5] & key_mask[5]) ^ bkt_key[5]; \
+ xor[6] = (pkt_key[6] & key_mask[6]) ^ bkt_key[6]; \
+ xor[7] = (pkt_key[7] & key_mask[7]) ^ bkt_key[7]; \
or = xor[0] | xor[1] | xor[2] | xor[3] | \
xor[4] | xor[5] | xor[6] | xor[7]; \
match_key = 0; \
@@ -637,7 +691,7 @@ static int rte_table_hash_ext_lookup_unoptimized(
\
default: \
match_key = 0; \
- if (memcmp(pkt_key, bkt_key, f->key_size) == 0) \
+ if (keycmp(bkt_key, pkt_key, key_mask, f->key_size) == 0) \
match_key = 1; \
} \
}
@@ -685,38 +739,7 @@ static int rte_table_hash_ext_lookup_unoptimized(
rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
}
-#define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index) \
-{ \
- struct grinder *g10, *g11; \
- uint64_t sig10, sig11, bkt10_index, bkt11_index; \
- struct rte_mbuf *mbuf10, *mbuf11; \
- struct bucket *bkt10, *bkt11, *buckets = t->buckets; \
- uint64_t bucket_mask = t->bucket_mask; \
- uint32_t signature_offset = t->signature_offset; \
- \
- mbuf10 = pkts[pkt10_index]; \
- sig10 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf10, signature_offset);\
- bkt10_index = sig10 & bucket_mask; \
- bkt10 = &buckets[bkt10_index]; \
- \
- mbuf11 = pkts[pkt11_index]; \
- sig11 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf11, signature_offset);\
- bkt11_index = sig11 & bucket_mask; \
- bkt11 = &buckets[bkt11_index]; \
- \
- rte_prefetch0(bkt10); \
- rte_prefetch0(bkt11); \
- \
- g10 = &g[pkt10_index]; \
- g10->sig = sig10; \
- g10->bkt = bkt10; \
- \
- g11 = &g[pkt11_index]; \
- g11->sig = sig11; \
- g11->bkt = bkt11; \
-}
-
-#define lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index) \
+#define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index) \
{ \
struct grinder *g10, *g11; \
uint64_t sig10, sig11, bkt10_index, bkt11_index; \
@@ -731,13 +754,13 @@ static int rte_table_hash_ext_lookup_unoptimized(
\
mbuf10 = pkts[pkt10_index]; \
key10 = RTE_MBUF_METADATA_UINT8_PTR(mbuf10, key_offset); \
- sig10 = (uint64_t) f_hash(key10, key_size, seed); \
+ sig10 = (uint64_t) f_hash(key10, t->key_mask, key_size, seed); \
bkt10_index = sig10 & bucket_mask; \
bkt10 = &buckets[bkt10_index]; \
\
mbuf11 = pkts[pkt11_index]; \
key11 = RTE_MBUF_METADATA_UINT8_PTR(mbuf11, key_offset); \
- sig11 = (uint64_t) f_hash(key11, key_size, seed); \
+ sig11 = (uint64_t) f_hash(key11, t->key_mask, key_size, seed); \
bkt11_index = sig11 & bucket_mask; \
bkt11 = &buckets[bkt11_index]; \
\
@@ -874,7 +897,7 @@ static int rte_table_hash_ext_lookup(
/* Cannot run the pipeline with less than 7 packets */
if (__builtin_popcountll(pkts_mask) < 7) {
status = rte_table_hash_ext_lookup_unoptimized(table, pkts,
- pkts_mask, lookup_hit_mask, entries, 0);
+ pkts_mask, lookup_hit_mask, entries);
RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in -
__builtin_popcountll(*lookup_hit_mask));
return status;
@@ -982,144 +1005,7 @@ static int rte_table_hash_ext_lookup(
uint64_t pkts_mask_out_slow = 0;
status = rte_table_hash_ext_lookup_unoptimized(table, pkts,
- pkts_mask_match_many, &pkts_mask_out_slow, entries, 0);
- pkts_mask_out |= pkts_mask_out_slow;
- }
-
- *lookup_hit_mask = pkts_mask_out;
- RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out));
- return status;
-}
-
-static int rte_table_hash_ext_lookup_dosig(
- void *table,
- struct rte_mbuf **pkts,
- uint64_t pkts_mask,
- uint64_t *lookup_hit_mask,
- void **entries)
-{
- struct rte_table_hash *t = (struct rte_table_hash *) table;
- struct grinder *g = t->grinders;
- uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index;
- uint64_t pkt20_index, pkt21_index, pkt30_index, pkt31_index;
- uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0;
- int status = 0;
-
- __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
- RTE_TABLE_HASH_EXT_STATS_PKTS_IN_ADD(t, n_pkts_in);
-
- /* Cannot run the pipeline with less than 7 packets */
- if (__builtin_popcountll(pkts_mask) < 7) {
- status = rte_table_hash_ext_lookup_unoptimized(table, pkts,
- pkts_mask, lookup_hit_mask, entries, 1);
- RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in -
- __builtin_popcountll(*lookup_hit_mask));
- return status;
- }
-
- /* Pipeline stage 0 */
- lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
-
- /* Pipeline feed */
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 0 */
- lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
-
- /* Pipeline feed */
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 0 */
- lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
-
- /* Pipeline stage 2 */
- lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
-
- /*
- * Pipeline run
- *
- */
- for ( ; pkts_mask; ) {
- /* Pipeline feed */
- pkt30_index = pkt20_index;
- pkt31_index = pkt21_index;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 0 */
- lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask,
- pkt00_index, pkt01_index);
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
-
- /* Pipeline stage 2 */
- lookup2_stage2(t, g, pkt20_index, pkt21_index,
- pkts_mask_match_many);
-
- /* Pipeline stage 3 */
- lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index,
- pkts_mask_out, entries);
- }
-
- /* Pipeline feed */
- pkt30_index = pkt20_index;
- pkt31_index = pkt21_index;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
-
- /* Pipeline stage 2 */
- lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
-
- /* Pipeline stage 3 */
- lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
- entries);
-
- /* Pipeline feed */
- pkt30_index = pkt20_index;
- pkt31_index = pkt21_index;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
-
- /* Pipeline stage 2 */
- lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
-
- /* Pipeline stage 3 */
- lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
- entries);
-
- /* Pipeline feed */
- pkt30_index = pkt20_index;
- pkt31_index = pkt21_index;
-
- /* Pipeline stage 3 */
- lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
- entries);
-
- /* Slow path */
- pkts_mask_match_many &= ~pkts_mask_out;
- if (pkts_mask_match_many) {
- uint64_t pkts_mask_out_slow = 0;
-
- status = rte_table_hash_ext_lookup_unoptimized(table, pkts,
- pkts_mask_match_many, &pkts_mask_out_slow, entries, 1);
+ pkts_mask_match_many, &pkts_mask_out_slow, entries);
pkts_mask_out |= pkts_mask_out_slow;
}
@@ -1152,14 +1038,3 @@ struct rte_table_ops rte_table_hash_ext_ops = {
.f_lookup = rte_table_hash_ext_lookup,
.f_stats = rte_table_hash_ext_stats_read,
};
-
-struct rte_table_ops rte_table_hash_ext_dosig_ops = {
- .f_create = rte_table_hash_ext_create,
- .f_free = rte_table_hash_ext_free,
- .f_add = rte_table_hash_ext_entry_add,
- .f_delete = rte_table_hash_ext_entry_delete,
- .f_add_bulk = NULL,
- .f_delete_bulk = NULL,
- .f_lookup = rte_table_hash_ext_lookup_dosig,
- .f_stats = rte_table_hash_ext_stats_read,
-};
diff --git a/lib/librte_table/rte_table_hash_key16.c b/lib/librte_table/rte_table_hash_key16.c
index ce057b78..b541735c 100644
--- a/lib/librte_table/rte_table_hash_key16.c
+++ b/lib/librte_table/rte_table_hash_key16.c
@@ -1,34 +1,34 @@
/*-
- * BSD LICENSE
+ * BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
*
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
*
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#include <stdio.h>
@@ -42,7 +42,9 @@
#include "rte_table_hash.h"
#include "rte_lru.h"
-#define RTE_TABLE_HASH_KEY_SIZE 16
+#define KEY_SIZE 16
+
+#define KEYS_PER_BUCKET 4
#define RTE_BUCKET_ENTRY_VALID 0x1LLU
@@ -79,11 +81,9 @@ struct rte_table_hash {
/* Input parameters */
uint32_t n_buckets;
- uint32_t n_entries_per_bucket;
uint32_t key_size;
uint32_t entry_size;
uint32_t bucket_size;
- uint32_t signature_offset;
uint32_t key_offset;
uint64_t key_mask[2];
rte_table_hash_op_hash f_hash;
@@ -99,17 +99,55 @@ struct rte_table_hash {
};
static int
-check_params_create_lru(struct rte_table_hash_key16_lru_params *params) {
- /* n_entries */
- if (params->n_entries == 0) {
- RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
+keycmp(void *a, void *b, void *b_mask)
+{
+ uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask;
+
+ return (a64[0] != (b64[0] & b_mask64[0])) ||
+ (a64[1] != (b64[1] & b_mask64[1]));
+}
+
+static void
+keycpy(void *dst, void *src, void *src_mask)
+{
+ uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask;
+
+ dst64[0] = src64[0] & src_mask64[0];
+ dst64[1] = src64[1] & src_mask64[1];
+}
+
+static int
+check_params_create(struct rte_table_hash_params *params)
+{
+ /* name */
+ if (params->name == NULL) {
+ RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__);
+ return -EINVAL;
+ }
+
+ /* key_size */
+ if (params->key_size != KEY_SIZE) {
+ RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
+ return -EINVAL;
+ }
+
+ /* n_keys */
+ if (params->n_keys == 0) {
+ RTE_LOG(ERR, TABLE, "%s: n_keys is zero\n", __func__);
+ return -EINVAL;
+ }
+
+ /* n_buckets */
+ if ((params->n_buckets == 0) ||
+ (!rte_is_power_of_2(params->n_buckets))) {
+ RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
return -EINVAL;
}
/* f_hash */
if (params->f_hash == NULL) {
- RTE_LOG(ERR, TABLE,
- "%s: f_hash function pointer is NULL\n", __func__);
+ RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
+ __func__);
return -EINVAL;
}
@@ -121,46 +159,67 @@ rte_table_hash_create_key16_lru(void *params,
int socket_id,
uint32_t entry_size)
{
- struct rte_table_hash_key16_lru_params *p =
- (struct rte_table_hash_key16_lru_params *) params;
+ struct rte_table_hash_params *p = params;
struct rte_table_hash *f;
- uint32_t n_buckets, n_entries_per_bucket,
- key_size, bucket_size_cl, total_size, i;
+ uint64_t bucket_size, total_size;
+ uint32_t n_buckets, i;
/* Check input parameters */
- if ((check_params_create_lru(p) != 0) ||
+ if ((check_params_create(p) != 0) ||
((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
((sizeof(struct rte_bucket_4_16) % 64) != 0))
return NULL;
- n_entries_per_bucket = 4;
- key_size = 16;
+
+ /*
+ * Table dimensioning
+ *
+ * Objective: Pick the number of buckets (n_buckets) so that there a chance
+ * to store n_keys keys in the table.
+ *
+ * Note: Since the buckets do not get extended, it is not possible to
+ * guarantee that n_keys keys can be stored in the table at any time. In the
+ * worst case scenario when all the n_keys fall into the same bucket, only
+ * a maximum of KEYS_PER_BUCKET keys will be stored in the table. This case
+ * defeats the purpose of the hash table. It indicates unsuitable f_hash or
+ * n_keys to n_buckets ratio.
+ *
+ * MIN(n_buckets) = (n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET
+ */
+ n_buckets = rte_align32pow2(
+ (p->n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET);
+ n_buckets = RTE_MAX(n_buckets, p->n_buckets);
/* Memory allocation */
- n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
- n_entries_per_bucket);
- bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket
- * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
- total_size = sizeof(struct rte_table_hash) + n_buckets *
- bucket_size_cl * RTE_CACHE_LINE_SIZE;
-
- f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+ bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_16) +
+ KEYS_PER_BUCKET * entry_size);
+ total_size = sizeof(struct rte_table_hash) + n_buckets * bucket_size;
+
+ if (total_size > SIZE_MAX) {
+ RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
+ "for hash table %s\n",
+ __func__, total_size, p->name);
+ return NULL;
+ }
+
+ f = rte_zmalloc_socket(p->name,
+ (size_t)total_size,
+ RTE_CACHE_LINE_SIZE,
+ socket_id);
if (f == NULL) {
- RTE_LOG(ERR, TABLE,
- "%s: Cannot allocate %u bytes for hash table\n",
- __func__, total_size);
+ RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
+ "for hash table %s\n",
+ __func__, total_size, p->name);
return NULL;
}
- RTE_LOG(INFO, TABLE,
- "%s: Hash table memory footprint is %u bytes\n",
- __func__, total_size);
+ RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
+ "is %" PRIu64 " bytes\n",
+ __func__, p->name, total_size);
/* Memory initialization */
f->n_buckets = n_buckets;
- f->n_entries_per_bucket = n_entries_per_bucket;
- f->key_size = key_size;
+ f->key_size = KEY_SIZE;
f->entry_size = entry_size;
- f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
- f->signature_offset = p->signature_offset;
+ f->bucket_size = bucket_size;
f->key_offset = p->key_offset;
f->f_hash = p->f_hash;
f->seed = p->seed;
@@ -212,19 +271,19 @@ rte_table_hash_entry_add_key16_lru(
uint64_t signature, pos;
uint32_t bucket_index, i;
- signature = f->f_hash(key, f->key_size, f->seed);
+ signature = f->f_hash(key, f->key_mask, f->key_size, f->seed);
bucket_index = signature & (f->n_buckets - 1);
bucket = (struct rte_bucket_4_16 *)
- &f->memory[bucket_index * f->bucket_size];
+ &f->memory[bucket_index * f->bucket_size];
signature |= RTE_BUCKET_ENTRY_VALID;
/* Key is present in the bucket */
for (i = 0; i < 4; i++) {
uint64_t bucket_signature = bucket->signature[i];
- uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+ uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
if ((bucket_signature == signature) &&
- (memcmp(key, bucket_key, f->key_size) == 0)) {
+ (keycmp(bucket_key, key, f->key_mask) == 0)) {
uint8_t *bucket_data = &bucket->data[i * f->entry_size];
memcpy(bucket_data, entry, f->entry_size);
@@ -238,13 +297,13 @@ rte_table_hash_entry_add_key16_lru(
/* Key is not present in the bucket */
for (i = 0; i < 4; i++) {
uint64_t bucket_signature = bucket->signature[i];
- uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+ uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
if (bucket_signature == 0) {
uint8_t *bucket_data = &bucket->data[i * f->entry_size];
bucket->signature[i] = signature;
- memcpy(bucket_key, key, f->key_size);
+ keycpy(bucket_key, key, f->key_mask);
memcpy(bucket_data, entry, f->entry_size);
lru_update(bucket, i);
*key_found = 0;
@@ -257,7 +316,7 @@ rte_table_hash_entry_add_key16_lru(
/* Bucket full: replace LRU entry */
pos = lru_pos(bucket);
bucket->signature[pos] = signature;
- memcpy(bucket->key[pos], key, f->key_size);
+ keycpy(&bucket->key[pos], key, f->key_mask);
memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
lru_update(bucket, pos);
*key_found = 0;
@@ -278,19 +337,19 @@ rte_table_hash_entry_delete_key16_lru(
uint64_t signature;
uint32_t bucket_index, i;
- signature = f->f_hash(key, f->key_size, f->seed);
+ signature = f->f_hash(key, f->key_mask, f->key_size, f->seed);
bucket_index = signature & (f->n_buckets - 1);
bucket = (struct rte_bucket_4_16 *)
- &f->memory[bucket_index * f->bucket_size];
+ &f->memory[bucket_index * f->bucket_size];
signature |= RTE_BUCKET_ENTRY_VALID;
/* Key is present in the bucket */
for (i = 0; i < 4; i++) {
uint64_t bucket_signature = bucket->signature[i];
- uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+ uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
if ((bucket_signature == signature) &&
- (memcmp(key, bucket_key, f->key_size) == 0)) {
+ (keycmp(bucket_key, key, f->key_mask) == 0)) {
uint8_t *bucket_data = &bucket->data[i * f->entry_size];
bucket->signature[i] = 0;
@@ -306,81 +365,71 @@ rte_table_hash_entry_delete_key16_lru(
return 0;
}
-static int
-check_params_create_ext(struct rte_table_hash_key16_ext_params *params) {
- /* n_entries */
- if (params->n_entries == 0) {
- RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
- return -EINVAL;
- }
-
- /* n_entries_ext */
- if (params->n_entries_ext == 0) {
- RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__);
- return -EINVAL;
- }
-
- /* f_hash */
- if (params->f_hash == NULL) {
- RTE_LOG(ERR, TABLE,
- "%s: f_hash function pointer is NULL\n", __func__);
- return -EINVAL;
- }
-
- return 0;
-}
-
static void *
rte_table_hash_create_key16_ext(void *params,
int socket_id,
uint32_t entry_size)
{
- struct rte_table_hash_key16_ext_params *p =
- (struct rte_table_hash_key16_ext_params *) params;
+ struct rte_table_hash_params *p = params;
struct rte_table_hash *f;
- uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket, key_size,
- bucket_size_cl, stack_size_cl, total_size, i;
+ uint64_t bucket_size, stack_size, total_size;
+ uint32_t n_buckets_ext, i;
/* Check input parameters */
- if ((check_params_create_ext(p) != 0) ||
+ if ((check_params_create(p) != 0) ||
((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
((sizeof(struct rte_bucket_4_16) % 64) != 0))
return NULL;
- n_entries_per_bucket = 4;
- key_size = 16;
+ /*
+ * Table dimensioning
+ *
+ * Objective: Pick the number of bucket extensions (n_buckets_ext) so that
+ * it is guaranteed that n_keys keys can be stored in the table at any time.
+ *
+ * The worst case scenario takes place when all the n_keys keys fall into
+ * the same bucket. Actually, due to the KEYS_PER_BUCKET scheme, the worst
+ * case takes place when (n_keys - KEYS_PER_BUCKET + 1) keys fall into the
+ * same bucket, while the remaining (KEYS_PER_BUCKET - 1) keys each fall
+ * into a different bucket. This case defeats the purpose of the hash table.
+ * It indicates unsuitable f_hash or n_keys to n_buckets ratio.
+ *
+ * n_buckets_ext = n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1
+ */
+ n_buckets_ext = p->n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1;
/* Memory allocation */
- n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
- n_entries_per_bucket);
- n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) /
- n_entries_per_bucket;
- bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket
- * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
- stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1)
- / RTE_CACHE_LINE_SIZE;
+ bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_16) +
+ KEYS_PER_BUCKET * entry_size);
+ stack_size = RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(uint32_t));
total_size = sizeof(struct rte_table_hash) +
- ((n_buckets + n_buckets_ext) * bucket_size_cl + stack_size_cl) *
- RTE_CACHE_LINE_SIZE;
+ (p->n_buckets + n_buckets_ext) * bucket_size + stack_size;
+ if (total_size > SIZE_MAX) {
+ RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
+ "for hash table %s\n",
+ __func__, total_size, p->name);
+ return NULL;
+ }
- f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+ f = rte_zmalloc_socket(p->name,
+ (size_t)total_size,
+ RTE_CACHE_LINE_SIZE,
+ socket_id);
if (f == NULL) {
- RTE_LOG(ERR, TABLE,
- "%s: Cannot allocate %u bytes for hash table\n",
- __func__, total_size);
+ RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
+ "for hash table %s\n",
+ __func__, total_size, p->name);
return NULL;
}
- RTE_LOG(INFO, TABLE,
- "%s: Hash table memory footprint is %u bytes\n",
- __func__, total_size);
+ RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
+ "is %" PRIu64 " bytes\n",
+ __func__, p->name, total_size);
/* Memory initialization */
- f->n_buckets = n_buckets;
- f->n_entries_per_bucket = n_entries_per_bucket;
- f->key_size = key_size;
+ f->n_buckets = p->n_buckets;
+ f->key_size = KEY_SIZE;
f->entry_size = entry_size;
- f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
- f->signature_offset = p->signature_offset;
+ f->bucket_size = bucket_size;
f->key_offset = p->key_offset;
f->f_hash = p->f_hash;
f->seed = p->seed;
@@ -388,10 +437,7 @@ rte_table_hash_create_key16_ext(void *params,
f->n_buckets_ext = n_buckets_ext;
f->stack_pos = n_buckets_ext;
f->stack = (uint32_t *)
- &f->memory[(n_buckets + n_buckets_ext) * f->bucket_size];
-
- for (i = 0; i < n_buckets_ext; i++)
- f->stack[i] = i;
+ &f->memory[(p->n_buckets + n_buckets_ext) * f->bucket_size];
if (p->key_mask != NULL) {
f->key_mask[0] = (((uint64_t *)p->key_mask)[0]);
@@ -401,6 +447,9 @@ rte_table_hash_create_key16_ext(void *params,
f->key_mask[1] = 0xFFFFFFFFFFFFFFFFLLU;
}
+ for (i = 0; i < n_buckets_ext; i++)
+ f->stack[i] = i;
+
return f;
}
@@ -432,20 +481,20 @@ rte_table_hash_entry_add_key16_ext(
uint64_t signature;
uint32_t bucket_index, i;
- signature = f->f_hash(key, f->key_size, f->seed);
+ signature = f->f_hash(key, f->key_mask, f->key_size, f->seed);
bucket_index = signature & (f->n_buckets - 1);
bucket0 = (struct rte_bucket_4_16 *)
- &f->memory[bucket_index * f->bucket_size];
+ &f->memory[bucket_index * f->bucket_size];
signature |= RTE_BUCKET_ENTRY_VALID;
/* Key is present in the bucket */
for (bucket = bucket0; bucket != NULL; bucket = bucket->next)
for (i = 0; i < 4; i++) {
uint64_t bucket_signature = bucket->signature[i];
- uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+ uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
if ((bucket_signature == signature) &&
- (memcmp(key, bucket_key, f->key_size) == 0)) {
+ (keycmp(bucket_key, key, f->key_mask) == 0)) {
uint8_t *bucket_data = &bucket->data[i *
f->entry_size];
@@ -458,17 +507,17 @@ rte_table_hash_entry_add_key16_ext(
/* Key is not present in the bucket */
for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
- bucket_prev = bucket, bucket = bucket->next)
+ bucket_prev = bucket, bucket = bucket->next)
for (i = 0; i < 4; i++) {
uint64_t bucket_signature = bucket->signature[i];
- uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+ uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
if (bucket_signature == 0) {
uint8_t *bucket_data = &bucket->data[i *
f->entry_size];
bucket->signature[i] = signature;
- memcpy(bucket_key, key, f->key_size);
+ keycpy(bucket_key, key, f->key_mask);
memcpy(bucket_data, entry, f->entry_size);
*key_found = 0;
*entry_ptr = (void *) bucket_data;
@@ -487,7 +536,7 @@ rte_table_hash_entry_add_key16_ext(
bucket_prev->next_valid = 1;
bucket->signature[0] = signature;
- memcpy(bucket->key[0], key, f->key_size);
+ keycpy(&bucket->key[0], key, f->key_mask);
memcpy(&bucket->data[0], entry, f->entry_size);
*key_found = 0;
*entry_ptr = (void *) &bucket->data[0];
@@ -509,7 +558,7 @@ rte_table_hash_entry_delete_key16_ext(
uint64_t signature;
uint32_t bucket_index, i;
- signature = f->f_hash(key, f->key_size, f->seed);
+ signature = f->f_hash(key, f->key_mask, f->key_size, f->seed);
bucket_index = signature & (f->n_buckets - 1);
bucket0 = (struct rte_bucket_4_16 *)
&f->memory[bucket_index * f->bucket_size];
@@ -520,18 +569,17 @@ rte_table_hash_entry_delete_key16_ext(
bucket_prev = bucket, bucket = bucket->next)
for (i = 0; i < 4; i++) {
uint64_t bucket_signature = bucket->signature[i];
- uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+ uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
if ((bucket_signature == signature) &&
- (memcmp(key, bucket_key, f->key_size) == 0)) {
+ (keycmp(bucket_key, key, f->key_mask) == 0)) {
uint8_t *bucket_data = &bucket->data[i *
f->entry_size];
bucket->signature[i] = 0;
*key_found = 1;
if (entry)
- memcpy(entry, bucket_data,
- f->entry_size);
+ memcpy(entry, bucket_data, f->entry_size);
if ((bucket->signature[0] == 0) &&
(bucket->signature[1] == 0) &&
@@ -558,26 +606,28 @@ rte_table_hash_entry_delete_key16_ext(
return 0;
}
-#define lookup_key16_cmp(key_in, bucket, pos) \
+#define lookup_key16_cmp(key_in, bucket, pos, f) \
{ \
- uint64_t xor[4][2], or[4], signature[4]; \
+ uint64_t xor[4][2], or[4], signature[4], k[2]; \
\
+ k[0] = key_in[0] & f->key_mask[0]; \
+ k[1] = key_in[1] & f->key_mask[1]; \
signature[0] = (~bucket->signature[0]) & 1; \
signature[1] = (~bucket->signature[1]) & 1; \
signature[2] = (~bucket->signature[2]) & 1; \
signature[3] = (~bucket->signature[3]) & 1; \
\
- xor[0][0] = key_in[0] ^ bucket->key[0][0]; \
- xor[0][1] = key_in[1] ^ bucket->key[0][1]; \
+ xor[0][0] = k[0] ^ bucket->key[0][0]; \
+ xor[0][1] = k[1] ^ bucket->key[0][1]; \
\
- xor[1][0] = key_in[0] ^ bucket->key[1][0]; \
- xor[1][1] = key_in[1] ^ bucket->key[1][1]; \
+ xor[1][0] = k[0] ^ bucket->key[1][0]; \
+ xor[1][1] = k[1] ^ bucket->key[1][1]; \
\
- xor[2][0] = key_in[0] ^ bucket->key[2][0]; \
- xor[2][1] = key_in[1] ^ bucket->key[2][1]; \
+ xor[2][0] = k[0] ^ bucket->key[2][0]; \
+ xor[2][1] = k[1] ^ bucket->key[2][1]; \
\
- xor[3][0] = key_in[0] ^ bucket->key[3][0]; \
- xor[3][1] = key_in[1] ^ bucket->key[3][1]; \
+ xor[3][0] = k[0] ^ bucket->key[3][0]; \
+ xor[3][1] = k[1] ^ bucket->key[3][1]; \
\
or[0] = xor[0][0] | xor[0][1] | signature[0]; \
or[1] = xor[1][0] | xor[1][1] | signature[1]; \
@@ -610,30 +660,12 @@ rte_table_hash_entry_delete_key16_ext(
#define lookup1_stage1(mbuf1, bucket1, f) \
{ \
- uint64_t signature; \
- uint32_t bucket_index; \
- \
- signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\
- bucket_index = signature & (f->n_buckets - 1); \
- bucket1 = (struct rte_bucket_4_16 *) \
- &f->memory[bucket_index * f->bucket_size]; \
- rte_prefetch0(bucket1); \
- rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
-}
-
-#define lookup1_stage1_dosig(mbuf1, bucket1, f) \
-{ \
uint64_t *key; \
uint64_t signature = 0; \
uint32_t bucket_index; \
- uint64_t hash_key_buffer[2]; \
\
key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\
- \
- hash_key_buffer[0] = key[0] & f->key_mask[0]; \
- hash_key_buffer[1] = key[1] & f->key_mask[1]; \
- signature = f->f_hash(hash_key_buffer, \
- RTE_TABLE_HASH_KEY_SIZE, f->seed); \
+ signature = f->f_hash(key, f->key_mask, KEY_SIZE, f->seed); \
\
bucket_index = signature & (f->n_buckets - 1); \
bucket1 = (struct rte_bucket_4_16 *) \
@@ -648,14 +680,10 @@ rte_table_hash_entry_delete_key16_ext(
void *a; \
uint64_t pkt_mask; \
uint64_t *key; \
- uint64_t hash_key_buffer[2]; \
uint32_t pos; \
\
key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
- hash_key_buffer[0] = key[0] & f->key_mask[0]; \
- hash_key_buffer[1] = key[1] & f->key_mask[1]; \
- \
- lookup_key16_cmp(hash_key_buffer, bucket2, pos); \
+ lookup_key16_cmp(key, bucket2, pos, f); \
\
pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
pkts_mask_out |= pkt_mask; \
@@ -673,14 +701,10 @@ rte_table_hash_entry_delete_key16_ext(
void *a; \
uint64_t pkt_mask, bucket_mask; \
uint64_t *key; \
- uint64_t hash_key_buffer[2]; \
uint32_t pos; \
\
key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
- hash_key_buffer[0] = key[0] & f->key_mask[0]; \
- hash_key_buffer[1] = key[1] & f->key_mask[1]; \
- \
- lookup_key16_cmp(hash_key_buffer, bucket2, pos); \
+ lookup_key16_cmp(key, bucket2, pos, f); \
\
pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
pkts_mask_out |= pkt_mask; \
@@ -703,15 +727,11 @@ rte_table_hash_entry_delete_key16_ext(
void *a; \
uint64_t pkt_mask, bucket_mask; \
uint64_t *key; \
- uint64_t hash_key_buffer[2]; \
uint32_t pos; \
\
bucket = buckets[pkt_index]; \
key = keys[pkt_index]; \
- hash_key_buffer[0] = key[0] & f->key_mask[0]; \
- hash_key_buffer[1] = key[1] & f->key_mask[1]; \
- \
- lookup_key16_cmp(hash_key_buffer, bucket, pos); \
+ lookup_key16_cmp(key, bucket, pos, f); \
\
pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;\
pkts_mask_out |= pkt_mask; \
@@ -775,36 +795,12 @@ rte_table_hash_entry_delete_key16_ext(
#define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f) \
{ \
- uint64_t signature10, signature11; \
- uint32_t bucket10_index, bucket11_index; \
- \
- signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\
- bucket10_index = signature10 & (f->n_buckets - 1); \
- bucket10 = (struct rte_bucket_4_16 *) \
- &f->memory[bucket10_index * f->bucket_size]; \
- rte_prefetch0(bucket10); \
- rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
- \
- signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\
- bucket11_index = signature11 & (f->n_buckets - 1); \
- bucket11 = (struct rte_bucket_4_16 *) \
- &f->memory[bucket11_index * f->bucket_size]; \
- rte_prefetch0(bucket11); \
- rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
-}
-
-#define lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f) \
-{ \
uint64_t *key10, *key11; \
- uint64_t hash_offset_buffer[2]; \
uint64_t signature10, signature11; \
uint32_t bucket10_index, bucket11_index; \
\
key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, f->key_offset);\
- hash_offset_buffer[0] = key10[0] & f->key_mask[0]; \
- hash_offset_buffer[1] = key10[1] & f->key_mask[1]; \
- signature10 = f->f_hash(hash_offset_buffer, \
- RTE_TABLE_HASH_KEY_SIZE, f->seed);\
+ signature10 = f->f_hash(key10, f->key_mask, KEY_SIZE, f->seed);\
bucket10_index = signature10 & (f->n_buckets - 1); \
bucket10 = (struct rte_bucket_4_16 *) \
&f->memory[bucket10_index * f->bucket_size]; \
@@ -812,10 +808,7 @@ rte_table_hash_entry_delete_key16_ext(
rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
\
key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, f->key_offset);\
- hash_offset_buffer[0] = key11[0] & f->key_mask[0]; \
- hash_offset_buffer[1] = key11[1] & f->key_mask[1]; \
- signature11 = f->f_hash(hash_offset_buffer, \
- RTE_TABLE_HASH_KEY_SIZE, f->seed);\
+ signature11 = f->f_hash(key11, f->key_mask, KEY_SIZE, f->seed);\
bucket11_index = signature11 & (f->n_buckets - 1); \
bucket11 = (struct rte_bucket_4_16 *) \
&f->memory[bucket11_index * f->bucket_size]; \
@@ -829,19 +822,13 @@ rte_table_hash_entry_delete_key16_ext(
void *a20, *a21; \
uint64_t pkt20_mask, pkt21_mask; \
uint64_t *key20, *key21; \
- uint64_t hash_key_buffer20[2]; \
- uint64_t hash_key_buffer21[2]; \
uint32_t pos20, pos21; \
\
key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
- hash_key_buffer20[0] = key20[0] & f->key_mask[0]; \
- hash_key_buffer20[1] = key20[1] & f->key_mask[1]; \
- hash_key_buffer21[0] = key21[0] & f->key_mask[0]; \
- hash_key_buffer21[1] = key21[1] & f->key_mask[1]; \
\
- lookup_key16_cmp(hash_key_buffer20, bucket20, pos20); \
- lookup_key16_cmp(hash_key_buffer21, bucket21, pos21); \
+ lookup_key16_cmp(key20, bucket20, pos20, f); \
+ lookup_key16_cmp(key21, bucket21, pos21, f); \
\
pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
@@ -864,19 +851,13 @@ rte_table_hash_entry_delete_key16_ext(
void *a20, *a21; \
uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
uint64_t *key20, *key21; \
- uint64_t hash_key_buffer20[2]; \
- uint64_t hash_key_buffer21[2]; \
uint32_t pos20, pos21; \
\
key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
- hash_key_buffer20[0] = key20[0] & f->key_mask[0]; \
- hash_key_buffer20[1] = key20[1] & f->key_mask[1]; \
- hash_key_buffer21[0] = key21[0] & f->key_mask[0]; \
- hash_key_buffer21[1] = key21[1] & f->key_mask[1]; \
\
- lookup_key16_cmp(hash_key_buffer20, bucket20, pos20); \
- lookup_key16_cmp(hash_key_buffer21, bucket21, pos21); \
+ lookup_key16_cmp(key20, bucket20, pos20, f); \
+ lookup_key16_cmp(key21, bucket21, pos21, f); \
\
pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
@@ -916,6 +897,7 @@ rte_table_hash_lookup_key16_lru(
uint64_t pkts_mask_out = 0;
__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+
RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
/* Cannot run the pipeline with less than 5 packets */
@@ -932,8 +914,8 @@ rte_table_hash_lookup_key16_lru(
}
*lookup_hit_mask = pkts_mask_out;
- RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f,
- n_pkts_in - __builtin_popcountll(pkts_mask_out));
+ RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
+ __builtin_popcountll(pkts_mask_out));
return 0;
}
@@ -1026,136 +1008,7 @@ rte_table_hash_lookup_key16_lru(
RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
__builtin_popcountll(pkts_mask_out));
return 0;
-} /* rte_table_hash_lookup_key16_lru() */
-
-static int
-rte_table_hash_lookup_key16_lru_dosig(
- void *table,
- struct rte_mbuf **pkts,
- uint64_t pkts_mask,
- uint64_t *lookup_hit_mask,
- void **entries)
-{
- struct rte_table_hash *f = (struct rte_table_hash *) table;
- struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
- struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
- uint32_t pkt00_index, pkt01_index, pkt10_index;
- uint32_t pkt11_index, pkt20_index, pkt21_index;
- uint64_t pkts_mask_out = 0;
-
- __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
-
- RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
-
- /* Cannot run the pipeline with less than 5 packets */
- if (__builtin_popcountll(pkts_mask) < 5) {
- for ( ; pkts_mask; ) {
- struct rte_bucket_4_16 *bucket;
- struct rte_mbuf *mbuf;
- uint32_t pkt_index;
-
- lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
- lookup1_stage1_dosig(mbuf, bucket, f);
- lookup1_stage2_lru(pkt_index, mbuf, bucket,
- pkts_mask_out, entries, f);
- }
-
- *lookup_hit_mask = pkts_mask_out;
- RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
- __builtin_popcountll(pkts_mask_out));
- return 0;
- }
-
- /*
- * Pipeline fill
- *
- */
- /* Pipeline stage 0 */
- lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
- pkts_mask, f);
-
- /* Pipeline feed */
- mbuf10 = mbuf00;
- mbuf11 = mbuf01;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 0 */
- lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
- pkts_mask, f);
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
-
- /*
- * Pipeline run
- *
- */
- for ( ; pkts_mask; ) {
- /* Pipeline feed */
- bucket20 = bucket10;
- bucket21 = bucket11;
- mbuf20 = mbuf10;
- mbuf21 = mbuf11;
- mbuf10 = mbuf00;
- mbuf11 = mbuf01;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 0 */
- lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
- mbuf00, mbuf01, pkts, pkts_mask, f);
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
-
- /* Pipeline stage 2 */
- lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
- bucket20, bucket21, pkts_mask_out, entries, f);
- }
-
- /*
- * Pipeline flush
- *
- */
- /* Pipeline feed */
- bucket20 = bucket10;
- bucket21 = bucket11;
- mbuf20 = mbuf10;
- mbuf21 = mbuf11;
- mbuf10 = mbuf00;
- mbuf11 = mbuf01;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
-
- /* Pipeline stage 2 */
- lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
- bucket20, bucket21, pkts_mask_out, entries, f);
-
- /* Pipeline feed */
- bucket20 = bucket10;
- bucket21 = bucket11;
- mbuf20 = mbuf10;
- mbuf21 = mbuf11;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
-
- /* Pipeline stage 2 */
- lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
- bucket20, bucket21, pkts_mask_out, entries, f);
-
- *lookup_hit_mask = pkts_mask_out;
- RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
- __builtin_popcountll(pkts_mask_out));
- return 0;
-} /* rte_table_hash_lookup_key16_lru_dosig() */
+} /* lookup LRU */
static int
rte_table_hash_lookup_key16_ext(
@@ -1175,6 +1028,7 @@ rte_table_hash_lookup_key16_ext(
uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+
RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
/* Cannot run the pipeline with less than 5 packets */
@@ -1306,159 +1160,7 @@ grind_next_buckets:
RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
__builtin_popcountll(pkts_mask_out));
return 0;
-} /* rte_table_hash_lookup_key16_ext() */
-
-static int
-rte_table_hash_lookup_key16_ext_dosig(
- void *table,
- struct rte_mbuf **pkts,
- uint64_t pkts_mask,
- uint64_t *lookup_hit_mask,
- void **entries)
-{
- struct rte_table_hash *f = (struct rte_table_hash *) table;
- struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
- struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
- uint32_t pkt00_index, pkt01_index, pkt10_index;
- uint32_t pkt11_index, pkt20_index, pkt21_index;
- uint64_t pkts_mask_out = 0, buckets_mask = 0;
- struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
- uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
-
- __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
-
- RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
-
- /* Cannot run the pipeline with less than 5 packets */
- if (__builtin_popcountll(pkts_mask) < 5) {
- for ( ; pkts_mask; ) {
- struct rte_bucket_4_16 *bucket;
- struct rte_mbuf *mbuf;
- uint32_t pkt_index;
-
- lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
- lookup1_stage1_dosig(mbuf, bucket, f);
- lookup1_stage2_ext(pkt_index, mbuf, bucket,
- pkts_mask_out, entries, buckets_mask,
- buckets, keys, f);
- }
-
- goto grind_next_buckets;
- }
-
- /*
- * Pipeline fill
- *
- */
- /* Pipeline stage 0 */
- lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
- pkts_mask, f);
-
- /* Pipeline feed */
- mbuf10 = mbuf00;
- mbuf11 = mbuf01;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 0 */
- lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
- pkts_mask, f);
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
-
- /*
- * Pipeline run
- *
- */
- for ( ; pkts_mask; ) {
- /* Pipeline feed */
- bucket20 = bucket10;
- bucket21 = bucket11;
- mbuf20 = mbuf10;
- mbuf21 = mbuf11;
- mbuf10 = mbuf00;
- mbuf11 = mbuf01;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 0 */
- lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
- mbuf00, mbuf01, pkts, pkts_mask, f);
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
-
- /* Pipeline stage 2 */
- lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
- bucket20, bucket21, pkts_mask_out, entries,
- buckets_mask, buckets, keys, f);
- }
-
- /*
- * Pipeline flush
- *
- */
- /* Pipeline feed */
- bucket20 = bucket10;
- bucket21 = bucket11;
- mbuf20 = mbuf10;
- mbuf21 = mbuf11;
- mbuf10 = mbuf00;
- mbuf11 = mbuf01;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
-
- /* Pipeline stage 2 */
- lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
- bucket20, bucket21, pkts_mask_out, entries,
- buckets_mask, buckets, keys, f);
-
- /* Pipeline feed */
- bucket20 = bucket10;
- bucket21 = bucket11;
- mbuf20 = mbuf10;
- mbuf21 = mbuf11;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
-
- /* Pipeline stage 2 */
- lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
- bucket20, bucket21, pkts_mask_out, entries,
- buckets_mask, buckets, keys, f);
-
-grind_next_buckets:
- /* Grind next buckets */
- for ( ; buckets_mask; ) {
- uint64_t buckets_mask_next = 0;
-
- for ( ; buckets_mask; ) {
- uint64_t pkt_mask;
- uint32_t pkt_index;
-
- pkt_index = __builtin_ctzll(buckets_mask);
- pkt_mask = 1LLU << pkt_index;
- buckets_mask &= ~pkt_mask;
-
- lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
- entries, buckets_mask_next, f);
- }
-
- buckets_mask = buckets_mask_next;
- }
-
- *lookup_hit_mask = pkts_mask_out;
- RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
- __builtin_popcountll(pkts_mask_out));
- return 0;
-} /* rte_table_hash_lookup_key16_ext_dosig() */
+} /* lookup EXT */
static int
rte_table_hash_key16_stats_read(void *table, struct rte_table_stats *stats, int clear)
@@ -1485,15 +1187,6 @@ struct rte_table_ops rte_table_hash_key16_lru_ops = {
.f_stats = rte_table_hash_key16_stats_read,
};
-struct rte_table_ops rte_table_hash_key16_lru_dosig_ops = {
- .f_create = rte_table_hash_create_key16_lru,
- .f_free = rte_table_hash_free_key16_lru,
- .f_add = rte_table_hash_entry_add_key16_lru,
- .f_delete = rte_table_hash_entry_delete_key16_lru,
- .f_lookup = rte_table_hash_lookup_key16_lru_dosig,
- .f_stats = rte_table_hash_key16_stats_read,
-};
-
struct rte_table_ops rte_table_hash_key16_ext_ops = {
.f_create = rte_table_hash_create_key16_ext,
.f_free = rte_table_hash_free_key16_ext,
@@ -1504,12 +1197,3 @@ struct rte_table_ops rte_table_hash_key16_ext_ops = {
.f_lookup = rte_table_hash_lookup_key16_ext,
.f_stats = rte_table_hash_key16_stats_read,
};
-
-struct rte_table_ops rte_table_hash_key16_ext_dosig_ops = {
- .f_create = rte_table_hash_create_key16_ext,
- .f_free = rte_table_hash_free_key16_ext,
- .f_add = rte_table_hash_entry_add_key16_ext,
- .f_delete = rte_table_hash_entry_delete_key16_ext,
- .f_lookup = rte_table_hash_lookup_key16_ext_dosig,
- .f_stats = rte_table_hash_key16_stats_read,
-};
diff --git a/lib/librte_table/rte_table_hash_key32.c b/lib/librte_table/rte_table_hash_key32.c
index 31fe6fda..d4364d62 100644
--- a/lib/librte_table/rte_table_hash_key32.c
+++ b/lib/librte_table/rte_table_hash_key32.c
@@ -1,34 +1,34 @@
/*-
- * BSD LICENSE
+ * BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
*
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
*
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#include <stdio.h>
@@ -42,7 +42,9 @@
#include "rte_table_hash.h"
#include "rte_lru.h"
-#define RTE_TABLE_HASH_KEY_SIZE 32
+#define KEY_SIZE 32
+
+#define KEYS_PER_BUCKET 4
#define RTE_BUCKET_ENTRY_VALID 0x1LLU
@@ -79,12 +81,11 @@ struct rte_table_hash {
/* Input parameters */
uint32_t n_buckets;
- uint32_t n_entries_per_bucket;
uint32_t key_size;
uint32_t entry_size;
uint32_t bucket_size;
- uint32_t signature_offset;
uint32_t key_offset;
+ uint64_t key_mask[4];
rte_table_hash_op_hash f_hash;
uint64_t seed;
@@ -98,10 +99,52 @@ struct rte_table_hash {
};
static int
-check_params_create_lru(struct rte_table_hash_key32_lru_params *params) {
- /* n_entries */
- if (params->n_entries == 0) {
- RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
+keycmp(void *a, void *b, void *b_mask)
+{
+ uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask;
+
+ return (a64[0] != (b64[0] & b_mask64[0])) ||
+ (a64[1] != (b64[1] & b_mask64[1])) ||
+ (a64[2] != (b64[2] & b_mask64[2])) ||
+ (a64[3] != (b64[3] & b_mask64[3]));
+}
+
+static void
+keycpy(void *dst, void *src, void *src_mask)
+{
+ uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask;
+
+ dst64[0] = src64[0] & src_mask64[0];
+ dst64[1] = src64[1] & src_mask64[1];
+ dst64[2] = src64[2] & src_mask64[2];
+ dst64[3] = src64[3] & src_mask64[3];
+}
+
+static int
+check_params_create(struct rte_table_hash_params *params)
+{
+ /* name */
+ if (params->name == NULL) {
+ RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__);
+ return -EINVAL;
+ }
+
+ /* key_size */
+ if (params->key_size != KEY_SIZE) {
+ RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
+ return -EINVAL;
+ }
+
+ /* n_keys */
+ if (params->n_keys == 0) {
+ RTE_LOG(ERR, TABLE, "%s: n_keys is zero\n", __func__);
+ return -EINVAL;
+ }
+
+ /* n_buckets */
+ if ((params->n_buckets == 0) ||
+ (!rte_is_power_of_2(params->n_buckets))) {
+ RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
return -EINVAL;
}
@@ -120,51 +163,83 @@ rte_table_hash_create_key32_lru(void *params,
int socket_id,
uint32_t entry_size)
{
- struct rte_table_hash_key32_lru_params *p =
- (struct rte_table_hash_key32_lru_params *) params;
+ struct rte_table_hash_params *p = params;
struct rte_table_hash *f;
- uint32_t n_buckets, n_entries_per_bucket, key_size, bucket_size_cl;
- uint32_t total_size, i;
+ uint64_t bucket_size, total_size;
+ uint32_t n_buckets, i;
/* Check input parameters */
- if ((check_params_create_lru(p) != 0) ||
+ if ((check_params_create(p) != 0) ||
((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
- ((sizeof(struct rte_bucket_4_32) % 64) != 0)) {
+ ((sizeof(struct rte_bucket_4_32) % 64) != 0))
return NULL;
- }
- n_entries_per_bucket = 4;
- key_size = 32;
+
+ /*
+ * Table dimensioning
+ *
+ * Objective: Pick the number of buckets (n_buckets) so that there a chance
+ * to store n_keys keys in the table.
+ *
+ * Note: Since the buckets do not get extended, it is not possible to
+ * guarantee that n_keys keys can be stored in the table at any time. In the
+ * worst case scenario when all the n_keys fall into the same bucket, only
+ * a maximum of KEYS_PER_BUCKET keys will be stored in the table. This case
+ * defeats the purpose of the hash table. It indicates unsuitable f_hash or
+ * n_keys to n_buckets ratio.
+ *
+ * MIN(n_buckets) = (n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET
+ */
+ n_buckets = rte_align32pow2(
+ (p->n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET);
+ n_buckets = RTE_MAX(n_buckets, p->n_buckets);
/* Memory allocation */
- n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
- n_entries_per_bucket);
- bucket_size_cl = (sizeof(struct rte_bucket_4_32) + n_entries_per_bucket
- * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
- total_size = sizeof(struct rte_table_hash) + n_buckets *
- bucket_size_cl * RTE_CACHE_LINE_SIZE;
-
- f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+ bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_32) +
+ KEYS_PER_BUCKET * entry_size);
+ total_size = sizeof(struct rte_table_hash) + n_buckets * bucket_size;
+ if (total_size > SIZE_MAX) {
+ RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
+ "for hash table %s\n",
+ __func__, total_size, p->name);
+ return NULL;
+ }
+
+ f = rte_zmalloc_socket(p->name,
+ (size_t)total_size,
+ RTE_CACHE_LINE_SIZE,
+ socket_id);
if (f == NULL) {
- RTE_LOG(ERR, TABLE,
- "%s: Cannot allocate %u bytes for hash table\n",
- __func__, total_size);
+ RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
+ "for hash table %s\n",
+ __func__, total_size, p->name);
return NULL;
}
RTE_LOG(INFO, TABLE,
- "%s: Hash table memory footprint is %u bytes\n", __func__,
- total_size);
+ "%s: Hash table %s memory footprint "
+ "is %" PRIu64 " bytes\n",
+ __func__, p->name, total_size);
/* Memory initialization */
f->n_buckets = n_buckets;
- f->n_entries_per_bucket = n_entries_per_bucket;
- f->key_size = key_size;
+ f->key_size = KEY_SIZE;
f->entry_size = entry_size;
- f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
- f->signature_offset = p->signature_offset;
+ f->bucket_size = bucket_size;
f->key_offset = p->key_offset;
f->f_hash = p->f_hash;
f->seed = p->seed;
+ if (p->key_mask != NULL) {
+ f->key_mask[0] = ((uint64_t *)p->key_mask)[0];
+ f->key_mask[1] = ((uint64_t *)p->key_mask)[1];
+ f->key_mask[2] = ((uint64_t *)p->key_mask)[2];
+ f->key_mask[3] = ((uint64_t *)p->key_mask)[3];
+ } else {
+ f->key_mask[0] = 0xFFFFFFFFFFFFFFFFLLU;
+ f->key_mask[1] = 0xFFFFFFFFFFFFFFFFLLU;
+ f->key_mask[2] = 0xFFFFFFFFFFFFFFFFLLU;
+ f->key_mask[3] = 0xFFFFFFFFFFFFFFFFLLU;
+ }
+
for (i = 0; i < n_buckets; i++) {
struct rte_bucket_4_32 *bucket;
@@ -204,7 +279,7 @@ rte_table_hash_entry_add_key32_lru(
uint64_t signature, pos;
uint32_t bucket_index, i;
- signature = f->f_hash(key, f->key_size, f->seed);
+ signature = f->f_hash(key, f->key_mask, f->key_size, f->seed);
bucket_index = signature & (f->n_buckets - 1);
bucket = (struct rte_bucket_4_32 *)
&f->memory[bucket_index * f->bucket_size];
@@ -213,10 +288,10 @@ rte_table_hash_entry_add_key32_lru(
/* Key is present in the bucket */
for (i = 0; i < 4; i++) {
uint64_t bucket_signature = bucket->signature[i];
- uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+ uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
if ((bucket_signature == signature) &&
- (memcmp(key, bucket_key, f->key_size) == 0)) {
+ (keycmp(bucket_key, key, f->key_mask) == 0)) {
uint8_t *bucket_data = &bucket->data[i * f->entry_size];
memcpy(bucket_data, entry, f->entry_size);
@@ -230,13 +305,13 @@ rte_table_hash_entry_add_key32_lru(
/* Key is not present in the bucket */
for (i = 0; i < 4; i++) {
uint64_t bucket_signature = bucket->signature[i];
- uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+ uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
if (bucket_signature == 0) {
uint8_t *bucket_data = &bucket->data[i * f->entry_size];
bucket->signature[i] = signature;
- memcpy(bucket_key, key, f->key_size);
+ keycpy(bucket_key, key, f->key_mask);
memcpy(bucket_data, entry, f->entry_size);
lru_update(bucket, i);
*key_found = 0;
@@ -249,10 +324,10 @@ rte_table_hash_entry_add_key32_lru(
/* Bucket full: replace LRU entry */
pos = lru_pos(bucket);
bucket->signature[pos] = signature;
- memcpy(bucket->key[pos], key, f->key_size);
+ keycpy(&bucket->key[pos], key, f->key_mask);
memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
lru_update(bucket, pos);
- *key_found = 0;
+ *key_found = 0;
*entry_ptr = (void *) &bucket->data[pos * f->entry_size];
return 0;
@@ -270,7 +345,7 @@ rte_table_hash_entry_delete_key32_lru(
uint64_t signature;
uint32_t bucket_index, i;
- signature = f->f_hash(key, f->key_size, f->seed);
+ signature = f->f_hash(key, f->key_mask, f->key_size, f->seed);
bucket_index = signature & (f->n_buckets - 1);
bucket = (struct rte_bucket_4_32 *)
&f->memory[bucket_index * f->bucket_size];
@@ -279,10 +354,10 @@ rte_table_hash_entry_delete_key32_lru(
/* Key is present in the bucket */
for (i = 0; i < 4; i++) {
uint64_t bucket_signature = bucket->signature[i];
- uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+ uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
if ((bucket_signature == signature) &&
- (memcmp(key, bucket_key, f->key_size) == 0)) {
+ (keycmp(bucket_key, key, f->key_mask) == 0)) {
uint8_t *bucket_data = &bucket->data[i * f->entry_size];
bucket->signature[i] = 0;
@@ -299,81 +374,72 @@ rte_table_hash_entry_delete_key32_lru(
return 0;
}
-static int
-check_params_create_ext(struct rte_table_hash_key32_ext_params *params) {
- /* n_entries */
- if (params->n_entries == 0) {
- RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
- return -EINVAL;
- }
-
- /* n_entries_ext */
- if (params->n_entries_ext == 0) {
- RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__);
- return -EINVAL;
- }
-
- /* f_hash */
- if (params->f_hash == NULL) {
- RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
- __func__);
- return -EINVAL;
- }
-
- return 0;
-}
-
static void *
rte_table_hash_create_key32_ext(void *params,
int socket_id,
uint32_t entry_size)
{
- struct rte_table_hash_key32_ext_params *p =
- params;
+ struct rte_table_hash_params *p = params;
struct rte_table_hash *f;
- uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket;
- uint32_t key_size, bucket_size_cl, stack_size_cl, total_size, i;
+ uint64_t bucket_size, stack_size, total_size;
+ uint32_t n_buckets_ext, i;
/* Check input parameters */
- if ((check_params_create_ext(p) != 0) ||
+ if ((check_params_create(p) != 0) ||
((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
((sizeof(struct rte_bucket_4_32) % 64) != 0))
return NULL;
- n_entries_per_bucket = 4;
- key_size = 32;
+ /*
+ * Table dimensioning
+ *
+ * Objective: Pick the number of bucket extensions (n_buckets_ext) so that
+ * it is guaranteed that n_keys keys can be stored in the table at any time.
+ *
+ * The worst case scenario takes place when all the n_keys keys fall into
+ * the same bucket. Actually, due to the KEYS_PER_BUCKET scheme, the worst
+ * case takes place when (n_keys - KEYS_PER_BUCKET + 1) keys fall into the
+ * same bucket, while the remaining (KEYS_PER_BUCKET - 1) keys each fall
+ * into a different bucket. This case defeats the purpose of the hash table.
+ * It indicates unsuitable f_hash or n_keys to n_buckets ratio.
+ *
+ * n_buckets_ext = n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1
+ */
+ n_buckets_ext = p->n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1;
/* Memory allocation */
- n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
- n_entries_per_bucket);
- n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) /
- n_entries_per_bucket;
- bucket_size_cl = (sizeof(struct rte_bucket_4_32) + n_entries_per_bucket
- * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
- stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1)
- / RTE_CACHE_LINE_SIZE;
+ bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_32) +
+ KEYS_PER_BUCKET * entry_size);
+ stack_size = RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(uint32_t));
total_size = sizeof(struct rte_table_hash) +
- ((n_buckets + n_buckets_ext) * bucket_size_cl + stack_size_cl) *
- RTE_CACHE_LINE_SIZE;
+ (p->n_buckets + n_buckets_ext) * bucket_size + stack_size;
+ if (total_size > SIZE_MAX) {
+ RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
+ "for hash table %s\n",
+ __func__, total_size, p->name);
+ return NULL;
+ }
- f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+ f = rte_zmalloc_socket(p->name,
+ (size_t)total_size,
+ RTE_CACHE_LINE_SIZE,
+ socket_id);
if (f == NULL) {
- RTE_LOG(ERR, TABLE,
- "%s: Cannot allocate %u bytes for hash table\n",
- __func__, total_size);
+ RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
+ "for hash table %s\n",
+ __func__, total_size, p->name);
return NULL;
}
RTE_LOG(INFO, TABLE,
- "%s: Hash table memory footprint is %u bytes\n", __func__,
- total_size);
+ "%s: Hash table %s memory footprint "
+ "is %" PRIu64" bytes\n",
+ __func__, p->name, total_size);
/* Memory initialization */
- f->n_buckets = n_buckets;
- f->n_entries_per_bucket = n_entries_per_bucket;
- f->key_size = key_size;
+ f->n_buckets = p->n_buckets;
+ f->key_size = KEY_SIZE;
f->entry_size = entry_size;
- f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
- f->signature_offset = p->signature_offset;
+ f->bucket_size = bucket_size;
f->key_offset = p->key_offset;
f->f_hash = p->f_hash;
f->seed = p->seed;
@@ -381,7 +447,19 @@ rte_table_hash_create_key32_ext(void *params,
f->n_buckets_ext = n_buckets_ext;
f->stack_pos = n_buckets_ext;
f->stack = (uint32_t *)
- &f->memory[(n_buckets + n_buckets_ext) * f->bucket_size];
+ &f->memory[(p->n_buckets + n_buckets_ext) * f->bucket_size];
+
+ if (p->key_mask != NULL) {
+ f->key_mask[0] = (((uint64_t *)p->key_mask)[0]);
+ f->key_mask[1] = (((uint64_t *)p->key_mask)[1]);
+ f->key_mask[2] = (((uint64_t *)p->key_mask)[2]);
+ f->key_mask[3] = (((uint64_t *)p->key_mask)[3]);
+ } else {
+ f->key_mask[0] = 0xFFFFFFFFFFFFFFFFLLU;
+ f->key_mask[1] = 0xFFFFFFFFFFFFFFFFLLU;
+ f->key_mask[2] = 0xFFFFFFFFFFFFFFFFLLU;
+ f->key_mask[3] = 0xFFFFFFFFFFFFFFFFLLU;
+ }
for (i = 0; i < n_buckets_ext; i++)
f->stack[i] = i;
@@ -417,7 +495,7 @@ rte_table_hash_entry_add_key32_ext(
uint64_t signature;
uint32_t bucket_index, i;
- signature = f->f_hash(key, f->key_size, f->seed);
+ signature = f->f_hash(key, f->key_mask, f->key_size, f->seed);
bucket_index = signature & (f->n_buckets - 1);
bucket0 = (struct rte_bucket_4_32 *)
&f->memory[bucket_index * f->bucket_size];
@@ -427,10 +505,10 @@ rte_table_hash_entry_add_key32_ext(
for (bucket = bucket0; bucket != NULL; bucket = bucket->next) {
for (i = 0; i < 4; i++) {
uint64_t bucket_signature = bucket->signature[i];
- uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+ uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
if ((bucket_signature == signature) &&
- (memcmp(key, bucket_key, f->key_size) == 0)) {
+ (keycmp(bucket_key, key, f->key_mask) == 0)) {
uint8_t *bucket_data = &bucket->data[i *
f->entry_size];
@@ -448,14 +526,14 @@ rte_table_hash_entry_add_key32_ext(
bucket_prev = bucket, bucket = bucket->next)
for (i = 0; i < 4; i++) {
uint64_t bucket_signature = bucket->signature[i];
- uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+ uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
if (bucket_signature == 0) {
uint8_t *bucket_data = &bucket->data[i *
f->entry_size];
bucket->signature[i] = signature;
- memcpy(bucket_key, key, f->key_size);
+ keycpy(bucket_key, key, f->key_mask);
memcpy(bucket_data, entry, f->entry_size);
*key_found = 0;
*entry_ptr = (void *) bucket_data;
@@ -475,7 +553,7 @@ rte_table_hash_entry_add_key32_ext(
bucket_prev->next_valid = 1;
bucket->signature[0] = signature;
- memcpy(bucket->key[0], key, f->key_size);
+ keycpy(&bucket->key[0], key, f->key_mask);
memcpy(&bucket->data[0], entry, f->entry_size);
*key_found = 0;
*entry_ptr = (void *) &bucket->data[0];
@@ -497,7 +575,7 @@ rte_table_hash_entry_delete_key32_ext(
uint64_t signature;
uint32_t bucket_index, i;
- signature = f->f_hash(key, f->key_size, f->seed);
+ signature = f->f_hash(key, f->key_mask, f->key_size, f->seed);
bucket_index = signature & (f->n_buckets - 1);
bucket0 = (struct rte_bucket_4_32 *)
&f->memory[bucket_index * f->bucket_size];
@@ -508,24 +586,23 @@ rte_table_hash_entry_delete_key32_ext(
bucket_prev = bucket, bucket = bucket->next)
for (i = 0; i < 4; i++) {
uint64_t bucket_signature = bucket->signature[i];
- uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+ uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
if ((bucket_signature == signature) &&
- (memcmp(key, bucket_key, f->key_size) == 0)) {
+ (keycmp(bucket_key, key, f->key_mask) == 0)) {
uint8_t *bucket_data = &bucket->data[i *
f->entry_size];
bucket->signature[i] = 0;
*key_found = 1;
if (entry)
- memcpy(entry, bucket_data,
- f->entry_size);
+ memcpy(entry, bucket_data, f->entry_size);
if ((bucket->signature[0] == 0) &&
- (bucket->signature[1] == 0) &&
- (bucket->signature[2] == 0) &&
- (bucket->signature[3] == 0) &&
- (bucket_prev != NULL)) {
+ (bucket->signature[1] == 0) &&
+ (bucket->signature[2] == 0) &&
+ (bucket->signature[3] == 0) &&
+ (bucket_prev != NULL)) {
bucket_prev->next = bucket->next;
bucket_prev->next_valid =
bucket->next_valid;
@@ -546,34 +623,39 @@ rte_table_hash_entry_delete_key32_ext(
return 0;
}
-#define lookup_key32_cmp(key_in, bucket, pos) \
+#define lookup_key32_cmp(key_in, bucket, pos, f) \
{ \
- uint64_t xor[4][4], or[4], signature[4]; \
+ uint64_t xor[4][4], or[4], signature[4], k[4]; \
+ \
+ k[0] = key_in[0] & f->key_mask[0]; \
+ k[1] = key_in[1] & f->key_mask[1]; \
+ k[2] = key_in[2] & f->key_mask[2]; \
+ k[3] = key_in[3] & f->key_mask[3]; \
\
signature[0] = ((~bucket->signature[0]) & 1); \
signature[1] = ((~bucket->signature[1]) & 1); \
signature[2] = ((~bucket->signature[2]) & 1); \
signature[3] = ((~bucket->signature[3]) & 1); \
\
- xor[0][0] = key_in[0] ^ bucket->key[0][0]; \
- xor[0][1] = key_in[1] ^ bucket->key[0][1]; \
- xor[0][2] = key_in[2] ^ bucket->key[0][2]; \
- xor[0][3] = key_in[3] ^ bucket->key[0][3]; \
+ xor[0][0] = k[0] ^ bucket->key[0][0]; \
+ xor[0][1] = k[1] ^ bucket->key[0][1]; \
+ xor[0][2] = k[2] ^ bucket->key[0][2]; \
+ xor[0][3] = k[3] ^ bucket->key[0][3]; \
\
- xor[1][0] = key_in[0] ^ bucket->key[1][0]; \
- xor[1][1] = key_in[1] ^ bucket->key[1][1]; \
- xor[1][2] = key_in[2] ^ bucket->key[1][2]; \
- xor[1][3] = key_in[3] ^ bucket->key[1][3]; \
+ xor[1][0] = k[0] ^ bucket->key[1][0]; \
+ xor[1][1] = k[1] ^ bucket->key[1][1]; \
+ xor[1][2] = k[2] ^ bucket->key[1][2]; \
+ xor[1][3] = k[3] ^ bucket->key[1][3]; \
\
- xor[2][0] = key_in[0] ^ bucket->key[2][0]; \
- xor[2][1] = key_in[1] ^ bucket->key[2][1]; \
- xor[2][2] = key_in[2] ^ bucket->key[2][2]; \
- xor[2][3] = key_in[3] ^ bucket->key[2][3]; \
+ xor[2][0] = k[0] ^ bucket->key[2][0]; \
+ xor[2][1] = k[1] ^ bucket->key[2][1]; \
+ xor[2][2] = k[2] ^ bucket->key[2][2]; \
+ xor[2][3] = k[3] ^ bucket->key[2][3]; \
\
- xor[3][0] = key_in[0] ^ bucket->key[3][0]; \
- xor[3][1] = key_in[1] ^ bucket->key[3][1]; \
- xor[3][2] = key_in[2] ^ bucket->key[3][2]; \
- xor[3][3] = key_in[3] ^ bucket->key[3][3]; \
+ xor[3][0] = k[0] ^ bucket->key[3][0]; \
+ xor[3][1] = k[1] ^ bucket->key[3][1]; \
+ xor[3][2] = k[2] ^ bucket->key[3][2]; \
+ xor[3][3] = k[3] ^ bucket->key[3][3]; \
\
or[0] = xor[0][0] | xor[0][1] | xor[0][2] | xor[0][3] | signature[0];\
or[1] = xor[1][0] | xor[1][1] | xor[1][2] | xor[1][3] | signature[1];\
@@ -604,12 +686,15 @@ rte_table_hash_entry_delete_key32_ext(
rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));\
}
-#define lookup1_stage1(mbuf1, bucket1, f) \
+#define lookup1_stage1(mbuf1, bucket1, f) \
{ \
+ uint64_t *key; \
uint64_t signature; \
uint32_t bucket_index; \
\
- signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\
+ key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset); \
+ signature = f->f_hash(key, f->key_mask, KEY_SIZE, f->seed); \
+ \
bucket_index = signature & (f->n_buckets - 1); \
bucket1 = (struct rte_bucket_4_32 *) \
&f->memory[bucket_index * f->bucket_size]; \
@@ -627,8 +712,7 @@ rte_table_hash_entry_delete_key32_ext(
uint32_t pos; \
\
key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
- \
- lookup_key32_cmp(key, bucket2, pos); \
+ lookup_key32_cmp(key, bucket2, pos, f); \
\
pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
pkts_mask_out |= pkt_mask; \
@@ -649,8 +733,7 @@ rte_table_hash_entry_delete_key32_ext(
uint32_t pos; \
\
key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
- \
- lookup_key32_cmp(key, bucket2, pos); \
+ lookup_key32_cmp(key, bucket2, pos, f); \
\
pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
pkts_mask_out |= pkt_mask; \
@@ -678,7 +761,7 @@ rte_table_hash_entry_delete_key32_ext(
bucket = buckets[pkt_index]; \
key = keys[pkt_index]; \
\
- lookup_key32_cmp(key, bucket, pos); \
+ lookup_key32_cmp(key, bucket, pos, f); \
\
pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;\
pkts_mask_out |= pkt_mask; \
@@ -745,22 +828,27 @@ rte_table_hash_entry_delete_key32_ext(
#define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f) \
{ \
- uint64_t signature10, signature11; \
- uint32_t bucket10_index, bucket11_index; \
+ uint64_t *key10, *key11; \
+ uint64_t signature10, signature11; \
+ uint32_t bucket10_index, bucket11_index; \
\
- signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\
- bucket10_index = signature10 & (f->n_buckets - 1); \
+ key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, f->key_offset); \
+ signature10 = f->f_hash(key10, f->key_mask, KEY_SIZE, f->seed); \
+ \
+ bucket10_index = signature10 & (f->n_buckets - 1); \
bucket10 = (struct rte_bucket_4_32 *) \
&f->memory[bucket10_index * f->bucket_size]; \
- rte_prefetch0(bucket10); \
+ rte_prefetch0(bucket10); \
rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
rte_prefetch0((void *)(((uintptr_t) bucket10) + 2 * RTE_CACHE_LINE_SIZE));\
\
- signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\
- bucket11_index = signature11 & (f->n_buckets - 1); \
+ key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, f->key_offset); \
+ signature11 = f->f_hash(key11, f->key_mask, KEY_SIZE, f->seed);\
+ \
+ bucket11_index = signature11 & (f->n_buckets - 1); \
bucket11 = (struct rte_bucket_4_32 *) \
&f->memory[bucket11_index * f->bucket_size]; \
- rte_prefetch0(bucket11); \
+ rte_prefetch0(bucket11); \
rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
rte_prefetch0((void *)(((uintptr_t) bucket11) + 2 * RTE_CACHE_LINE_SIZE));\
}
@@ -776,8 +864,8 @@ rte_table_hash_entry_delete_key32_ext(
key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
\
- lookup_key32_cmp(key20, bucket20, pos20); \
- lookup_key32_cmp(key21, bucket21, pos21); \
+ lookup_key32_cmp(key20, bucket20, pos20, f); \
+ lookup_key32_cmp(key21, bucket21, pos21, f); \
\
pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
@@ -805,8 +893,8 @@ rte_table_hash_entry_delete_key32_ext(
key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
\
- lookup_key32_cmp(key20, bucket20, pos20); \
- lookup_key32_cmp(key21, bucket21, pos21); \
+ lookup_key32_cmp(key20, bucket20, pos20, f); \
+ lookup_key32_cmp(key21, bucket21, pos21, f); \
\
pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
diff --git a/lib/librte_table/rte_table_hash_key8.c b/lib/librte_table/rte_table_hash_key8.c
index 5f0c6566..94373043 100644
--- a/lib/librte_table/rte_table_hash_key8.c
+++ b/lib/librte_table/rte_table_hash_key8.c
@@ -1,34 +1,34 @@
/*-
- * BSD LICENSE
+ * BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
*
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
*
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#include <stdio.h>
@@ -42,7 +42,9 @@
#include "rte_table_hash.h"
#include "rte_lru.h"
-#define RTE_TABLE_HASH_KEY_SIZE 8
+#define KEY_SIZE 8
+
+#define KEYS_PER_BUCKET 4
#ifdef RTE_TABLE_STATS_COLLECT
@@ -76,11 +78,9 @@ struct rte_table_hash {
/* Input parameters */
uint32_t n_buckets;
- uint32_t n_entries_per_bucket;
uint32_t key_size;
uint32_t entry_size;
uint32_t bucket_size;
- uint32_t signature_offset;
uint32_t key_offset;
uint64_t key_mask;
rte_table_hash_op_hash f_hash;
@@ -96,10 +96,46 @@ struct rte_table_hash {
};
static int
-check_params_create_lru(struct rte_table_hash_key8_lru_params *params) {
- /* n_entries */
- if (params->n_entries == 0) {
- RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
+keycmp(void *a, void *b, void *b_mask)
+{
+ uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask;
+
+ return a64[0] != (b64[0] & b_mask64[0]);
+}
+
+static void
+keycpy(void *dst, void *src, void *src_mask)
+{
+ uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask;
+
+ dst64[0] = src64[0] & src_mask64[0];
+}
+
+static int
+check_params_create(struct rte_table_hash_params *params)
+{
+ /* name */
+ if (params->name == NULL) {
+ RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__);
+ return -EINVAL;
+ }
+
+ /* key_size */
+ if (params->key_size != KEY_SIZE) {
+ RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
+ return -EINVAL;
+ }
+
+ /* n_keys */
+ if (params->n_keys == 0) {
+ RTE_LOG(ERR, TABLE, "%s: n_keys is zero\n", __func__);
+ return -EINVAL;
+ }
+
+ /* n_buckets */
+ if ((params->n_buckets == 0) ||
+ (!rte_is_power_of_2(params->n_buckets))) {
+ RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
return -EINVAL;
}
@@ -116,47 +152,68 @@ check_params_create_lru(struct rte_table_hash_key8_lru_params *params) {
static void *
rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size)
{
- struct rte_table_hash_key8_lru_params *p =
- (struct rte_table_hash_key8_lru_params *) params;
+ struct rte_table_hash_params *p = params;
struct rte_table_hash *f;
- uint32_t n_buckets, n_entries_per_bucket, key_size, bucket_size_cl;
- uint32_t total_size, i;
+ uint64_t bucket_size, total_size;
+ uint32_t n_buckets, i;
/* Check input parameters */
- if ((check_params_create_lru(p) != 0) ||
+ if ((check_params_create(p) != 0) ||
((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
- ((sizeof(struct rte_bucket_4_8) % 64) != 0)) {
+ ((sizeof(struct rte_bucket_4_8) % 64) != 0))
return NULL;
- }
- n_entries_per_bucket = 4;
- key_size = 8;
+
+ /*
+ * Table dimensioning
+ *
+ * Objective: Pick the number of buckets (n_buckets) so that there a chance
+ * to store n_keys keys in the table.
+ *
+ * Note: Since the buckets do not get extended, it is not possible to
+ * guarantee that n_keys keys can be stored in the table at any time. In the
+ * worst case scenario when all the n_keys fall into the same bucket, only
+ * a maximum of KEYS_PER_BUCKET keys will be stored in the table. This case
+ * defeats the purpose of the hash table. It indicates unsuitable f_hash or
+ * n_keys to n_buckets ratio.
+ *
+ * MIN(n_buckets) = (n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET
+ */
+ n_buckets = rte_align32pow2(
+ (p->n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET);
+ n_buckets = RTE_MAX(n_buckets, p->n_buckets);
/* Memory allocation */
- n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
- n_entries_per_bucket);
- bucket_size_cl = (sizeof(struct rte_bucket_4_8) + n_entries_per_bucket *
- entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
- total_size = sizeof(struct rte_table_hash) + n_buckets *
- bucket_size_cl * RTE_CACHE_LINE_SIZE;
-
- f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+ bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_8) +
+ KEYS_PER_BUCKET * entry_size);
+ total_size = sizeof(struct rte_table_hash) + n_buckets * bucket_size;
+
+ if (total_size > SIZE_MAX) {
+ RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes"
+ " for hash table %s\n",
+ __func__, total_size, p->name);
+ return NULL;
+ }
+
+ f = rte_zmalloc_socket(p->name,
+ (size_t)total_size,
+ RTE_CACHE_LINE_SIZE,
+ socket_id);
if (f == NULL) {
- RTE_LOG(ERR, TABLE,
- "%s: Cannot allocate %u bytes for hash table\n",
- __func__, total_size);
+ RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes"
+ " for hash table %s\n",
+ __func__, total_size, p->name);
return NULL;
}
- RTE_LOG(INFO, TABLE,
- "%s: Hash table memory footprint is %u bytes\n",
- __func__, total_size);
+
+ RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
+ "is %" PRIu64 " bytes\n",
+ __func__, p->name, total_size);
/* Memory initialization */
f->n_buckets = n_buckets;
- f->n_entries_per_bucket = n_entries_per_bucket;
- f->key_size = key_size;
+ f->key_size = KEY_SIZE;
f->entry_size = entry_size;
- f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
- f->signature_offset = p->signature_offset;
+ f->bucket_size = bucket_size;
f->key_offset = p->key_offset;
f->f_hash = p->f_hash;
f->seed = p->seed;
@@ -205,7 +262,7 @@ rte_table_hash_entry_add_key8_lru(
uint64_t signature, mask, pos;
uint32_t bucket_index, i;
- signature = f->f_hash(key, f->key_size, f->seed);
+ signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
bucket_index = signature & (f->n_buckets - 1);
bucket = (struct rte_bucket_4_8 *)
&f->memory[bucket_index * f->bucket_size];
@@ -213,10 +270,10 @@ rte_table_hash_entry_add_key8_lru(
/* Key is present in the bucket */
for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
uint64_t bucket_signature = bucket->signature;
- uint64_t bucket_key = bucket->key[i];
+ uint64_t *bucket_key = &bucket->key[i];
if ((bucket_signature & mask) &&
- (*((uint64_t *) key) == bucket_key)) {
+ (keycmp(bucket_key, key, &f->key_mask) == 0)) {
uint8_t *bucket_data = &bucket->data[i * f->entry_size];
memcpy(bucket_data, entry, f->entry_size);
@@ -235,7 +292,7 @@ rte_table_hash_entry_add_key8_lru(
uint8_t *bucket_data = &bucket->data[i * f->entry_size];
bucket->signature |= mask;
- bucket->key[i] = *((uint64_t *) key);
+ keycpy(&bucket->key[i], key, &f->key_mask);
memcpy(bucket_data, entry, f->entry_size);
lru_update(bucket, i);
*key_found = 0;
@@ -247,10 +304,10 @@ rte_table_hash_entry_add_key8_lru(
/* Bucket full: replace LRU entry */
pos = lru_pos(bucket);
- bucket->key[pos] = *((uint64_t *) key);
+ keycpy(&bucket->key[pos], key, &f->key_mask);
memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
lru_update(bucket, pos);
- *key_found = 0;
+ *key_found = 0;
*entry_ptr = (void *) &bucket->data[pos * f->entry_size];
return 0;
@@ -268,7 +325,7 @@ rte_table_hash_entry_delete_key8_lru(
uint64_t signature, mask;
uint32_t bucket_index, i;
- signature = f->f_hash(key, f->key_size, f->seed);
+ signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
bucket_index = signature & (f->n_buckets - 1);
bucket = (struct rte_bucket_4_8 *)
&f->memory[bucket_index * f->bucket_size];
@@ -276,10 +333,10 @@ rte_table_hash_entry_delete_key8_lru(
/* Key is present in the bucket */
for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
uint64_t bucket_signature = bucket->signature;
- uint64_t bucket_key = bucket->key[i];
+ uint64_t *bucket_key = &bucket->key[i];
if ((bucket_signature & mask) &&
- (*((uint64_t *) key) == bucket_key)) {
+ (keycmp(bucket_key, key, &f->key_mask) == 0)) {
uint8_t *bucket_data = &bucket->data[i * f->entry_size];
bucket->signature &= ~mask;
@@ -296,79 +353,71 @@ rte_table_hash_entry_delete_key8_lru(
return 0;
}
-static int
-check_params_create_ext(struct rte_table_hash_key8_ext_params *params) {
- /* n_entries */
- if (params->n_entries == 0) {
- RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
- return -EINVAL;
- }
-
- /* n_entries_ext */
- if (params->n_entries_ext == 0) {
- RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__);
- return -EINVAL;
- }
-
- /* f_hash */
- if (params->f_hash == NULL) {
- RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
- __func__);
- return -EINVAL;
- }
-
- return 0;
-}
-
static void *
rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size)
{
- struct rte_table_hash_key8_ext_params *p =
- (struct rte_table_hash_key8_ext_params *) params;
+ struct rte_table_hash_params *p = params;
struct rte_table_hash *f;
- uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket, key_size;
- uint32_t bucket_size_cl, stack_size_cl, total_size, i;
+ uint64_t bucket_size, stack_size, total_size;
+ uint32_t n_buckets_ext, i;
/* Check input parameters */
- if ((check_params_create_ext(p) != 0) ||
+ if ((check_params_create(p) != 0) ||
((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
((sizeof(struct rte_bucket_4_8) % 64) != 0))
return NULL;
- n_entries_per_bucket = 4;
- key_size = 8;
+ /*
+ * Table dimensioning
+ *
+ * Objective: Pick the number of bucket extensions (n_buckets_ext) so that
+ * it is guaranteed that n_keys keys can be stored in the table at any time.
+ *
+ * The worst case scenario takes place when all the n_keys keys fall into
+ * the same bucket. Actually, due to the KEYS_PER_BUCKET scheme, the worst
+ * case takes place when (n_keys - KEYS_PER_BUCKET + 1) keys fall into the
+ * same bucket, while the remaining (KEYS_PER_BUCKET - 1) keys each fall
+ * into a different bucket. This case defeats the purpose of the hash table.
+ * It indicates unsuitable f_hash or n_keys to n_buckets ratio.
+ *
+ * n_buckets_ext = n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1
+ */
+ n_buckets_ext = p->n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1;
/* Memory allocation */
- n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
- n_entries_per_bucket);
- n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) /
- n_entries_per_bucket;
- bucket_size_cl = (sizeof(struct rte_bucket_4_8) + n_entries_per_bucket *
- entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
- stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1)
- / RTE_CACHE_LINE_SIZE;
- total_size = sizeof(struct rte_table_hash) + ((n_buckets +
- n_buckets_ext) * bucket_size_cl + stack_size_cl) *
- RTE_CACHE_LINE_SIZE;
-
- f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+ bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_8) +
+ KEYS_PER_BUCKET * entry_size);
+ stack_size = RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(uint32_t));
+ total_size = sizeof(struct rte_table_hash) +
+ (p->n_buckets + n_buckets_ext) * bucket_size + stack_size;
+
+ if (total_size > SIZE_MAX) {
+ RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
+ "for hash table %s\n",
+ __func__, total_size, p->name);
+ return NULL;
+ }
+
+ f = rte_zmalloc_socket(p->name,
+ (size_t)total_size,
+ RTE_CACHE_LINE_SIZE,
+ socket_id);
if (f == NULL) {
RTE_LOG(ERR, TABLE,
- "%s: Cannot allocate %u bytes for hash table\n",
- __func__, total_size);
+ "%s: Cannot allocate %" PRIu64 " bytes "
+ "for hash table %s\n",
+ __func__, total_size, p->name);
return NULL;
}
- RTE_LOG(INFO, TABLE,
- "%s: Hash table memory footprint is %u bytes\n",
- __func__, total_size);
+ RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
+ "is %" PRIu64 " bytes\n",
+ __func__, p->name, total_size);
/* Memory initialization */
- f->n_buckets = n_buckets;
- f->n_entries_per_bucket = n_entries_per_bucket;
- f->key_size = key_size;
+ f->n_buckets = p->n_buckets;
+ f->key_size = KEY_SIZE;
f->entry_size = entry_size;
- f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
- f->signature_offset = p->signature_offset;
+ f->bucket_size = bucket_size;
f->key_offset = p->key_offset;
f->f_hash = p->f_hash;
f->seed = p->seed;
@@ -376,7 +425,7 @@ rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size)
f->n_buckets_ext = n_buckets_ext;
f->stack_pos = n_buckets_ext;
f->stack = (uint32_t *)
- &f->memory[(n_buckets + n_buckets_ext) * f->bucket_size];
+ &f->memory[(p->n_buckets + n_buckets_ext) * f->bucket_size];
if (p->key_mask != NULL)
f->key_mask = ((uint64_t *)p->key_mask)[0];
@@ -417,7 +466,7 @@ rte_table_hash_entry_add_key8_ext(
uint64_t signature;
uint32_t bucket_index, i;
- signature = f->f_hash(key, f->key_size, f->seed);
+ signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
bucket_index = signature & (f->n_buckets - 1);
bucket0 = (struct rte_bucket_4_8 *)
&f->memory[bucket_index * f->bucket_size];
@@ -428,10 +477,10 @@ rte_table_hash_entry_add_key8_ext(
for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
uint64_t bucket_signature = bucket->signature;
- uint64_t bucket_key = bucket->key[i];
+ uint64_t *bucket_key = &bucket->key[i];
if ((bucket_signature & mask) &&
- (*((uint64_t *) key) == bucket_key)) {
+ (keycmp(bucket_key, key, &f->key_mask) == 0)) {
uint8_t *bucket_data = &bucket->data[i *
f->entry_size];
@@ -456,7 +505,7 @@ rte_table_hash_entry_add_key8_ext(
f->entry_size];
bucket->signature |= mask;
- bucket->key[i] = *((uint64_t *) key);
+ keycpy(&bucket->key[i], key, &f->key_mask);
memcpy(bucket_data, entry, f->entry_size);
*key_found = 0;
*entry_ptr = (void *) bucket_data;
@@ -476,7 +525,7 @@ rte_table_hash_entry_add_key8_ext(
bucket_prev->next_valid = 1;
bucket->signature = 1;
- bucket->key[0] = *((uint64_t *) key);
+ keycpy(&bucket->key[0], key, &f->key_mask);
memcpy(&bucket->data[0], entry, f->entry_size);
*key_found = 0;
*entry_ptr = (void *) &bucket->data[0];
@@ -498,7 +547,7 @@ rte_table_hash_entry_delete_key8_ext(
uint64_t signature;
uint32_t bucket_index, i;
- signature = f->f_hash(key, f->key_size, f->seed);
+ signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
bucket_index = signature & (f->n_buckets - 1);
bucket0 = (struct rte_bucket_4_8 *)
&f->memory[bucket_index * f->bucket_size];
@@ -510,10 +559,10 @@ rte_table_hash_entry_delete_key8_ext(
for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
uint64_t bucket_signature = bucket->signature;
- uint64_t bucket_key = bucket->key[i];
+ uint64_t *bucket_key = &bucket->key[i];
if ((bucket_signature & mask) &&
- (*((uint64_t *) key) == bucket_key)) {
+ (keycmp(bucket_key, key, &f->key_mask) == 0)) {
uint8_t *bucket_data = &bucket->data[i *
f->entry_size];
@@ -546,16 +595,17 @@ rte_table_hash_entry_delete_key8_ext(
return 0;
}
-#define lookup_key8_cmp(key_in, bucket, pos) \
+#define lookup_key8_cmp(key_in, bucket, pos, f) \
{ \
- uint64_t xor[4], signature; \
+ uint64_t xor[4], signature, k; \
\
signature = ~bucket->signature; \
\
- xor[0] = (key_in[0] ^ bucket->key[0]) | (signature & 1);\
- xor[1] = (key_in[0] ^ bucket->key[1]) | (signature & 2);\
- xor[2] = (key_in[0] ^ bucket->key[2]) | (signature & 4);\
- xor[3] = (key_in[0] ^ bucket->key[3]) | (signature & 8);\
+ k = key_in[0] & f->key_mask; \
+ xor[0] = (k ^ bucket->key[0]) | (signature & 1); \
+ xor[1] = (k ^ bucket->key[1]) | (signature & 2); \
+ xor[2] = (k ^ bucket->key[2]) | (signature & 4); \
+ xor[3] = (k ^ bucket->key[3]) | (signature & 8); \
\
pos = 4; \
if (xor[0] == 0) \
@@ -583,27 +633,12 @@ rte_table_hash_entry_delete_key8_ext(
#define lookup1_stage1(mbuf1, bucket1, f) \
{ \
- uint64_t signature; \
- uint32_t bucket_index; \
- \
- signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\
- bucket_index = signature & (f->n_buckets - 1); \
- bucket1 = (struct rte_bucket_4_8 *) \
- &f->memory[bucket_index * f->bucket_size]; \
- rte_prefetch0(bucket1); \
-}
-
-#define lookup1_stage1_dosig(mbuf1, bucket1, f) \
-{ \
uint64_t *key; \
uint64_t signature; \
uint32_t bucket_index; \
- uint64_t hash_key_buffer; \
\
key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\
- hash_key_buffer = *key & f->key_mask; \
- signature = f->f_hash(&hash_key_buffer, \
- RTE_TABLE_HASH_KEY_SIZE, f->seed); \
+ signature = f->f_hash(key, &f->key_mask, KEY_SIZE, f->seed); \
bucket_index = signature & (f->n_buckets - 1); \
bucket1 = (struct rte_bucket_4_8 *) \
&f->memory[bucket_index * f->bucket_size]; \
@@ -617,12 +652,9 @@ rte_table_hash_entry_delete_key8_ext(
uint64_t pkt_mask; \
uint64_t *key; \
uint32_t pos; \
- uint64_t hash_key_buffer; \
\
key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
- hash_key_buffer = key[0] & f->key_mask; \
- \
- lookup_key8_cmp((&hash_key_buffer), bucket2, pos); \
+ lookup_key8_cmp(key, bucket2, pos, f); \
\
pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\
pkts_mask_out |= pkt_mask; \
@@ -641,12 +673,9 @@ rte_table_hash_entry_delete_key8_ext(
uint64_t pkt_mask, bucket_mask; \
uint64_t *key; \
uint32_t pos; \
- uint64_t hash_key_buffer; \
\
key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
- hash_key_buffer = *key & f->key_mask; \
- \
- lookup_key8_cmp((&hash_key_buffer), bucket2, pos); \
+ lookup_key8_cmp(key, bucket2, pos, f); \
\
pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\
pkts_mask_out |= pkt_mask; \
@@ -670,13 +699,10 @@ rte_table_hash_entry_delete_key8_ext(
uint64_t pkt_mask, bucket_mask; \
uint64_t *key; \
uint32_t pos; \
- uint64_t hash_key_buffer; \
\
bucket = buckets[pkt_index]; \
key = keys[pkt_index]; \
- hash_key_buffer = (*key) & f->key_mask; \
- \
- lookup_key8_cmp((&hash_key_buffer), bucket, pos); \
+ lookup_key8_cmp(key, bucket, pos, f); \
\
pkt_mask = ((bucket->signature >> pos) & 1LLU) << pkt_index;\
pkts_mask_out |= pkt_mask; \
@@ -738,29 +764,9 @@ rte_table_hash_entry_delete_key8_ext(
rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
}
-#define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f) \
-{ \
- uint64_t signature10, signature11; \
- uint32_t bucket10_index, bucket11_index; \
- \
- signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\
- bucket10_index = signature10 & (f->n_buckets - 1); \
- bucket10 = (struct rte_bucket_4_8 *) \
- &f->memory[bucket10_index * f->bucket_size]; \
- rte_prefetch0(bucket10); \
- \
- signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\
- bucket11_index = signature11 & (f->n_buckets - 1); \
- bucket11 = (struct rte_bucket_4_8 *) \
- &f->memory[bucket11_index * f->bucket_size]; \
- rte_prefetch0(bucket11); \
-}
-
-#define lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f)\
+#define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)\
{ \
uint64_t *key10, *key11; \
- uint64_t hash_offset_buffer10; \
- uint64_t hash_offset_buffer11; \
uint64_t signature10, signature11; \
uint32_t bucket10_index, bucket11_index; \
rte_table_hash_op_hash f_hash = f->f_hash; \
@@ -769,18 +775,14 @@ rte_table_hash_entry_delete_key8_ext(
\
key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, key_offset);\
key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, key_offset);\
- hash_offset_buffer10 = *key10 & f->key_mask; \
- hash_offset_buffer11 = *key11 & f->key_mask; \
\
- signature10 = f_hash(&hash_offset_buffer10, \
- RTE_TABLE_HASH_KEY_SIZE, seed); \
+ signature10 = f_hash(key10, &f->key_mask, KEY_SIZE, seed); \
bucket10_index = signature10 & (f->n_buckets - 1); \
bucket10 = (struct rte_bucket_4_8 *) \
&f->memory[bucket10_index * f->bucket_size]; \
rte_prefetch0(bucket10); \
\
- signature11 = f_hash(&hash_offset_buffer11, \
- RTE_TABLE_HASH_KEY_SIZE, seed); \
+ signature11 = f_hash(key11, &f->key_mask, KEY_SIZE, seed); \
bucket11_index = signature11 & (f->n_buckets - 1); \
bucket11 = (struct rte_bucket_4_8 *) \
&f->memory[bucket11_index * f->bucket_size]; \
@@ -793,17 +795,13 @@ rte_table_hash_entry_delete_key8_ext(
void *a20, *a21; \
uint64_t pkt20_mask, pkt21_mask; \
uint64_t *key20, *key21; \
- uint64_t hash_offset_buffer20; \
- uint64_t hash_offset_buffer21; \
uint32_t pos20, pos21; \
\
key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
- hash_offset_buffer20 = *key20 & f->key_mask; \
- hash_offset_buffer21 = *key21 & f->key_mask; \
\
- lookup_key8_cmp((&hash_offset_buffer20), bucket20, pos20);\
- lookup_key8_cmp((&hash_offset_buffer21), bucket21, pos21);\
+ lookup_key8_cmp(key20, bucket20, pos20, f); \
+ lookup_key8_cmp(key21, bucket21, pos21, f); \
\
pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\
pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\
@@ -826,17 +824,13 @@ rte_table_hash_entry_delete_key8_ext(
void *a20, *a21; \
uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
uint64_t *key20, *key21; \
- uint64_t hash_offset_buffer20; \
- uint64_t hash_offset_buffer21; \
uint32_t pos20, pos21; \
\
key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
- hash_offset_buffer20 = *key20 & f->key_mask; \
- hash_offset_buffer21 = *key21 & f->key_mask; \
\
- lookup_key8_cmp((&hash_offset_buffer20), bucket20, pos20);\
- lookup_key8_cmp((&hash_offset_buffer21), bucket21, pos21);\
+ lookup_key8_cmp(key20, bucket20, pos20, f); \
+ lookup_key8_cmp(key21, bucket21, pos21, f); \
\
pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\
pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\
@@ -871,8 +865,8 @@ rte_table_hash_lookup_key8_lru(
struct rte_table_hash *f = (struct rte_table_hash *) table;
struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
- uint32_t pkt00_index, pkt01_index, pkt10_index,
- pkt11_index, pkt20_index, pkt21_index;
+ uint32_t pkt00_index, pkt01_index, pkt10_index;
+ uint32_t pkt11_index, pkt20_index, pkt21_index;
uint64_t pkts_mask_out = 0;
__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
@@ -888,7 +882,7 @@ rte_table_hash_lookup_key8_lru(
lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
lookup1_stage1(mbuf, bucket, f);
lookup1_stage2_lru(pkt_index, mbuf, bucket,
- pkts_mask_out, entries, f);
+ pkts_mask_out, entries, f);
}
*lookup_hit_mask = pkts_mask_out;
@@ -984,133 +978,7 @@ rte_table_hash_lookup_key8_lru(
*lookup_hit_mask = pkts_mask_out;
RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
return 0;
-} /* rte_table_hash_lookup_key8_lru() */
-
-static int
-rte_table_hash_lookup_key8_lru_dosig(
- void *table,
- struct rte_mbuf **pkts,
- uint64_t pkts_mask,
- uint64_t *lookup_hit_mask,
- void **entries)
-{
- struct rte_table_hash *f = (struct rte_table_hash *) table;
- struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
- struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
- uint32_t pkt00_index, pkt01_index, pkt10_index;
- uint32_t pkt11_index, pkt20_index, pkt21_index;
- uint64_t pkts_mask_out = 0;
-
- __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
- RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
-
- /* Cannot run the pipeline with less than 5 packets */
- if (__builtin_popcountll(pkts_mask) < 5) {
- for ( ; pkts_mask; ) {
- struct rte_bucket_4_8 *bucket;
- struct rte_mbuf *mbuf;
- uint32_t pkt_index;
-
- lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
- lookup1_stage1_dosig(mbuf, bucket, f);
- lookup1_stage2_lru(pkt_index, mbuf, bucket,
- pkts_mask_out, entries, f);
- }
-
- *lookup_hit_mask = pkts_mask_out;
- RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
- return 0;
- }
-
- /*
- * Pipeline fill
- *
- */
- /* Pipeline stage 0 */
- lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
- pkts_mask, f);
-
- /* Pipeline feed */
- mbuf10 = mbuf00;
- mbuf11 = mbuf01;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 0 */
- lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
- pkts_mask, f);
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
-
- /*
- * Pipeline run
- *
- */
- for ( ; pkts_mask; ) {
- /* Pipeline feed */
- bucket20 = bucket10;
- bucket21 = bucket11;
- mbuf20 = mbuf10;
- mbuf21 = mbuf11;
- mbuf10 = mbuf00;
- mbuf11 = mbuf01;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 0 */
- lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
- mbuf00, mbuf01, pkts, pkts_mask, f);
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
-
- /* Pipeline stage 2 */
- lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
- bucket20, bucket21, pkts_mask_out, entries, f);
- }
-
- /*
- * Pipeline flush
- *
- */
- /* Pipeline feed */
- bucket20 = bucket10;
- bucket21 = bucket11;
- mbuf20 = mbuf10;
- mbuf21 = mbuf11;
- mbuf10 = mbuf00;
- mbuf11 = mbuf01;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
-
- /* Pipeline stage 2 */
- lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
- bucket20, bucket21, pkts_mask_out, entries, f);
-
- /* Pipeline feed */
- bucket20 = bucket10;
- bucket21 = bucket11;
- mbuf20 = mbuf10;
- mbuf21 = mbuf11;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
-
- /* Pipeline stage 2 */
- lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
- bucket20, bucket21, pkts_mask_out, entries, f);
-
- *lookup_hit_mask = pkts_mask_out;
- RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
- return 0;
-} /* rte_table_hash_lookup_key8_lru_dosig() */
+} /* lookup LRU */
static int
rte_table_hash_lookup_key8_ext(
@@ -1142,8 +1010,8 @@ rte_table_hash_lookup_key8_ext(
lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
lookup1_stage1(mbuf, bucket, f);
lookup1_stage2_ext(pkt_index, mbuf, bucket,
- pkts_mask_out, entries, buckets_mask, buckets,
- keys, f);
+ pkts_mask_out, entries, buckets_mask,
+ buckets, keys, f);
}
goto grind_next_buckets;
@@ -1260,157 +1128,7 @@ grind_next_buckets:
*lookup_hit_mask = pkts_mask_out;
RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
return 0;
-} /* rte_table_hash_lookup_key8_ext() */
-
-static int
-rte_table_hash_lookup_key8_ext_dosig(
- void *table,
- struct rte_mbuf **pkts,
- uint64_t pkts_mask,
- uint64_t *lookup_hit_mask,
- void **entries)
-{
- struct rte_table_hash *f = (struct rte_table_hash *) table;
- struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
- struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
- uint32_t pkt00_index, pkt01_index, pkt10_index;
- uint32_t pkt11_index, pkt20_index, pkt21_index;
- uint64_t pkts_mask_out = 0, buckets_mask = 0;
- struct rte_bucket_4_8 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
- uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
-
- __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
- RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
-
- /* Cannot run the pipeline with less than 5 packets */
- if (__builtin_popcountll(pkts_mask) < 5) {
- for ( ; pkts_mask; ) {
- struct rte_bucket_4_8 *bucket;
- struct rte_mbuf *mbuf;
- uint32_t pkt_index;
-
- lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
- lookup1_stage1_dosig(mbuf, bucket, f);
- lookup1_stage2_ext(pkt_index, mbuf, bucket,
- pkts_mask_out, entries, buckets_mask,
- buckets, keys, f);
- }
-
- goto grind_next_buckets;
- }
-
- /*
- * Pipeline fill
- *
- */
- /* Pipeline stage 0 */
- lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
- pkts_mask, f);
-
- /* Pipeline feed */
- mbuf10 = mbuf00;
- mbuf11 = mbuf01;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 0 */
- lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
- pkts_mask, f);
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
-
- /*
- * Pipeline run
- *
- */
- for ( ; pkts_mask; ) {
- /* Pipeline feed */
- bucket20 = bucket10;
- bucket21 = bucket11;
- mbuf20 = mbuf10;
- mbuf21 = mbuf11;
- mbuf10 = mbuf00;
- mbuf11 = mbuf01;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 0 */
- lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
- mbuf00, mbuf01, pkts, pkts_mask, f);
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
-
- /* Pipeline stage 2 */
- lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
- bucket20, bucket21, pkts_mask_out, entries,
- buckets_mask, buckets, keys, f);
- }
-
- /*
- * Pipeline flush
- *
- */
- /* Pipeline feed */
- bucket20 = bucket10;
- bucket21 = bucket11;
- mbuf20 = mbuf10;
- mbuf21 = mbuf11;
- mbuf10 = mbuf00;
- mbuf11 = mbuf01;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
-
- /* Pipeline stage 2 */
- lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
- bucket20, bucket21, pkts_mask_out, entries,
- buckets_mask, buckets, keys, f);
-
- /* Pipeline feed */
- bucket20 = bucket10;
- bucket21 = bucket11;
- mbuf20 = mbuf10;
- mbuf21 = mbuf11;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
-
- /* Pipeline stage 2 */
- lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
- bucket20, bucket21, pkts_mask_out, entries,
- buckets_mask, buckets, keys, f);
-
-grind_next_buckets:
- /* Grind next buckets */
- for ( ; buckets_mask; ) {
- uint64_t buckets_mask_next = 0;
-
- for ( ; buckets_mask; ) {
- uint64_t pkt_mask;
- uint32_t pkt_index;
-
- pkt_index = __builtin_ctzll(buckets_mask);
- pkt_mask = 1LLU << pkt_index;
- buckets_mask &= ~pkt_mask;
-
- lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
- entries, buckets_mask_next, f);
- }
-
- buckets_mask = buckets_mask_next;
- }
-
- *lookup_hit_mask = pkts_mask_out;
- RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
- return 0;
-} /* rte_table_hash_lookup_key8_dosig_ext() */
+} /* lookup EXT */
static int
rte_table_hash_key8_stats_read(void *table, struct rte_table_stats *stats, int clear)
@@ -1437,17 +1155,6 @@ struct rte_table_ops rte_table_hash_key8_lru_ops = {
.f_stats = rte_table_hash_key8_stats_read,
};
-struct rte_table_ops rte_table_hash_key8_lru_dosig_ops = {
- .f_create = rte_table_hash_create_key8_lru,
- .f_free = rte_table_hash_free_key8_lru,
- .f_add = rte_table_hash_entry_add_key8_lru,
- .f_delete = rte_table_hash_entry_delete_key8_lru,
- .f_add_bulk = NULL,
- .f_delete_bulk = NULL,
- .f_lookup = rte_table_hash_lookup_key8_lru_dosig,
- .f_stats = rte_table_hash_key8_stats_read,
-};
-
struct rte_table_ops rte_table_hash_key8_ext_ops = {
.f_create = rte_table_hash_create_key8_ext,
.f_free = rte_table_hash_free_key8_ext,
@@ -1458,14 +1165,3 @@ struct rte_table_ops rte_table_hash_key8_ext_ops = {
.f_lookup = rte_table_hash_lookup_key8_ext,
.f_stats = rte_table_hash_key8_stats_read,
};
-
-struct rte_table_ops rte_table_hash_key8_ext_dosig_ops = {
- .f_create = rte_table_hash_create_key8_ext,
- .f_free = rte_table_hash_free_key8_ext,
- .f_add = rte_table_hash_entry_add_key8_ext,
- .f_delete = rte_table_hash_entry_delete_key8_ext,
- .f_add_bulk = NULL,
- .f_delete_bulk = NULL,
- .f_lookup = rte_table_hash_lookup_key8_ext_dosig,
- .f_stats = rte_table_hash_key8_stats_read,
-};
diff --git a/lib/librte_table/rte_table_hash_lru.c b/lib/librte_table/rte_table_hash_lru.c
index 5a4864e2..a07392fd 100644
--- a/lib/librte_table/rte_table_hash_lru.c
+++ b/lib/librte_table/rte_table_hash_lru.c
@@ -1,34 +1,34 @@
/*-
- * BSD LICENSE
+ * BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
*
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
*
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
@@ -86,7 +86,6 @@ struct rte_table_hash {
uint32_t n_buckets;
rte_table_hash_op_hash f_hash;
uint64_t seed;
- uint32_t signature_offset;
uint32_t key_offset;
/* Internal */
@@ -99,6 +98,7 @@ struct rte_table_hash {
struct grinder grinders[RTE_PORT_IN_BURST_SIZE_MAX];
/* Tables */
+ uint64_t *key_mask;
struct bucket *buckets;
uint8_t *key_mem;
uint8_t *data_mem;
@@ -109,29 +109,53 @@ struct rte_table_hash {
};
static int
-check_params_create(struct rte_table_hash_lru_params *params)
+keycmp(void *a, void *b, void *b_mask, uint32_t n_bytes)
{
- uint32_t n_buckets_min;
+ uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask;
+ uint32_t i;
+
+ for (i = 0; i < n_bytes / sizeof(uint64_t); i++)
+ if (a64[i] != (b64[i] & b_mask64[i]))
+ return 1;
+
+ return 0;
+}
+
+static void
+keycpy(void *dst, void *src, void *src_mask, uint32_t n_bytes)
+{
+ uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask;
+ uint32_t i;
+
+ for (i = 0; i < n_bytes / sizeof(uint64_t); i++)
+ dst64[i] = src64[i] & src_mask64[i];
+}
+
+static int
+check_params_create(struct rte_table_hash_params *params)
+{
+ /* name */
+ if (params->name == NULL) {
+ RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__);
+ return -EINVAL;
+ }
/* key_size */
- if ((params->key_size == 0) ||
+ if ((params->key_size < sizeof(uint64_t)) ||
(!rte_is_power_of_2(params->key_size))) {
RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
return -EINVAL;
}
/* n_keys */
- if ((params->n_keys == 0) ||
- (!rte_is_power_of_2(params->n_keys))) {
+ if (params->n_keys == 0) {
RTE_LOG(ERR, TABLE, "%s: n_keys invalid value\n", __func__);
return -EINVAL;
}
/* n_buckets */
- n_buckets_min = (params->n_keys + KEYS_PER_BUCKET - 1) / params->n_keys;
if ((params->n_buckets == 0) ||
- (!rte_is_power_of_2(params->n_keys)) ||
- (params->n_buckets < n_buckets_min)) {
+ (!rte_is_power_of_2(params->n_buckets))) {
RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
return -EINVAL;
}
@@ -148,13 +172,13 @@ check_params_create(struct rte_table_hash_lru_params *params)
static void *
rte_table_hash_lru_create(void *params, int socket_id, uint32_t entry_size)
{
- struct rte_table_hash_lru_params *p =
- params;
+ struct rte_table_hash_params *p = params;
struct rte_table_hash *t;
- uint32_t total_size, table_meta_sz;
- uint32_t bucket_sz, key_sz, key_stack_sz, data_sz;
- uint32_t bucket_offset, key_offset, key_stack_offset, data_offset;
- uint32_t i;
+ uint64_t table_meta_sz, key_mask_sz, bucket_sz, key_sz, key_stack_sz;
+ uint64_t data_sz, total_size;
+ uint64_t key_mask_offset, bucket_offset, key_offset, key_stack_offset;
+ uint64_t data_offset;
+ uint32_t n_buckets, i;
/* Check input parameters */
if ((check_params_create(p) != 0) ||
@@ -164,33 +188,65 @@ rte_table_hash_lru_create(void *params, int socket_id, uint32_t entry_size)
return NULL;
}
+ /*
+ * Table dimensioning
+ *
+ * Objective: Pick the number of buckets (n_buckets) so that there a chance
+ * to store n_keys keys in the table.
+ *
+ * Note: Since the buckets do not get extended, it is not possible to
+ * guarantee that n_keys keys can be stored in the table at any time. In the
+ * worst case scenario when all the n_keys fall into the same bucket, only
+ * a maximum of KEYS_PER_BUCKET keys will be stored in the table. This case
+ * defeats the purpose of the hash table. It indicates unsuitable f_hash or
+ * n_keys to n_buckets ratio.
+ *
+ * MIN(n_buckets) = (n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET
+ */
+ n_buckets = rte_align32pow2(
+ (p->n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET);
+ n_buckets = RTE_MAX(n_buckets, p->n_buckets);
+
/* Memory allocation */
table_meta_sz = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_table_hash));
- bucket_sz = RTE_CACHE_LINE_ROUNDUP(p->n_buckets * sizeof(struct bucket));
+ key_mask_sz = RTE_CACHE_LINE_ROUNDUP(p->key_size);
+ bucket_sz = RTE_CACHE_LINE_ROUNDUP(n_buckets * sizeof(struct bucket));
key_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * p->key_size);
key_stack_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * sizeof(uint32_t));
data_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * entry_size);
- total_size = table_meta_sz + bucket_sz + key_sz + key_stack_sz +
- data_sz;
+ total_size = table_meta_sz + key_mask_sz + bucket_sz + key_sz +
+ key_stack_sz + data_sz;
+
+ if (total_size > SIZE_MAX) {
+ RTE_LOG(ERR, TABLE,
+ "%s: Cannot allocate %" PRIu64 " bytes for hash "
+ "table %s\n",
+ __func__, total_size, p->name);
+ return NULL;
+ }
- t = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
+ t = rte_zmalloc_socket(p->name,
+ (size_t)total_size,
+ RTE_CACHE_LINE_SIZE,
+ socket_id);
if (t == NULL) {
RTE_LOG(ERR, TABLE,
- "%s: Cannot allocate %u bytes for hash table\n",
- __func__, total_size);
+ "%s: Cannot allocate %" PRIu64 " bytes for hash "
+ "table %s\n",
+ __func__, total_size, p->name);
return NULL;
}
- RTE_LOG(INFO, TABLE, "%s (%u-byte key): Hash table memory footprint is "
- "%u bytes\n", __func__, p->key_size, total_size);
+ RTE_LOG(INFO, TABLE, "%s (%u-byte key): Hash table %s memory footprint"
+ " is %" PRIu64 " bytes\n",
+ __func__, p->key_size, p->name, total_size);
/* Memory initialization */
t->key_size = p->key_size;
t->entry_size = entry_size;
t->n_keys = p->n_keys;
- t->n_buckets = p->n_buckets;
+ t->n_buckets = n_buckets;
t->f_hash = p->f_hash;
t->seed = p->seed;
- t->signature_offset = p->signature_offset;
t->key_offset = p->key_offset;
/* Internal */
@@ -199,16 +255,24 @@ rte_table_hash_lru_create(void *params, int socket_id, uint32_t entry_size)
t->data_size_shl = __builtin_ctzl(entry_size);
/* Tables */
- bucket_offset = 0;
+ key_mask_offset = 0;
+ bucket_offset = key_mask_offset + key_mask_sz;
key_offset = bucket_offset + bucket_sz;
key_stack_offset = key_offset + key_sz;
data_offset = key_stack_offset + key_stack_sz;
+ t->key_mask = (uint64_t *) &t->memory[key_mask_offset];
t->buckets = (struct bucket *) &t->memory[bucket_offset];
t->key_mem = &t->memory[key_offset];
t->key_stack = (uint32_t *) &t->memory[key_stack_offset];
t->data_mem = &t->memory[data_offset];
+ /* Key mask */
+ if (p->key_mask == NULL)
+ memset(t->key_mask, 0xFF, p->key_size);
+ else
+ memcpy(t->key_mask, p->key_mask, p->key_size);
+
/* Key stack */
for (i = 0; i < t->n_keys; i++)
t->key_stack[i] = t->n_keys - 1 - i;
@@ -246,7 +310,7 @@ rte_table_hash_lru_entry_add(void *table, void *key, void *entry,
uint64_t sig;
uint32_t bkt_index, i;
- sig = t->f_hash(key, t->key_size, t->seed);
+ sig = t->f_hash(key, t->key_mask, t->key_size, t->seed);
bkt_index = sig & t->bucket_mask;
bkt = &t->buckets[bkt_index];
sig = (sig >> 16) | 1LLU;
@@ -258,8 +322,8 @@ rte_table_hash_lru_entry_add(void *table, void *key, void *entry,
uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
t->key_size_shl];
- if ((sig == bkt_sig) && (memcmp(key, bkt_key, t->key_size)
- == 0)) {
+ if ((sig == bkt_sig) && (keycmp(bkt_key, key, t->key_mask,
+ t->key_size) == 0)) {
uint8_t *data = &t->data_mem[bkt_key_index <<
t->data_size_shl];
@@ -292,7 +356,7 @@ rte_table_hash_lru_entry_add(void *table, void *key, void *entry,
bkt->sig[i] = (uint16_t) sig;
bkt->key_pos[i] = bkt_key_index;
- memcpy(bkt_key, key, t->key_size);
+ keycpy(bkt_key, key, t->key_mask, t->key_size);
memcpy(data, entry, t->entry_size);
lru_update(bkt, i);
@@ -311,7 +375,7 @@ rte_table_hash_lru_entry_add(void *table, void *key, void *entry,
uint8_t *data = &t->data_mem[bkt_key_index << t->data_size_shl];
bkt->sig[pos] = (uint16_t) sig;
- memcpy(bkt_key, key, t->key_size);
+ keycpy(bkt_key, key, t->key_mask, t->key_size);
memcpy(data, entry, t->entry_size);
lru_update(bkt, pos);
@@ -330,7 +394,7 @@ rte_table_hash_lru_entry_delete(void *table, void *key, int *key_found,
uint64_t sig;
uint32_t bkt_index, i;
- sig = t->f_hash(key, t->key_size, t->seed);
+ sig = t->f_hash(key, t->key_mask, t->key_size, t->seed);
bkt_index = sig & t->bucket_mask;
bkt = &t->buckets[bkt_index];
sig = (sig >> 16) | 1LLU;
@@ -343,14 +407,15 @@ rte_table_hash_lru_entry_delete(void *table, void *key, int *key_found,
t->key_size_shl];
if ((sig == bkt_sig) &&
- (memcmp(key, bkt_key, t->key_size) == 0)) {
+ (keycmp(bkt_key, key, t->key_mask, t->key_size) == 0)) {
uint8_t *data = &t->data_mem[bkt_key_index <<
t->data_size_shl];
bkt->sig[i] = 0;
t->key_stack[t->key_stack_tos++] = bkt_key_index;
*key_found = 1;
- memcpy(entry, data, t->entry_size);
+ if (entry)
+ memcpy(entry, data, t->entry_size);
return 0;
}
}
@@ -365,8 +430,7 @@ static int rte_table_hash_lru_lookup_unoptimized(
struct rte_mbuf **pkts,
uint64_t pkts_mask,
uint64_t *lookup_hit_mask,
- void **entries,
- int dosig)
+ void **entries)
{
struct rte_table_hash *t = (struct rte_table_hash *) table;
uint64_t pkts_mask_out = 0;
@@ -387,11 +451,7 @@ static int rte_table_hash_lru_lookup_unoptimized(
pkt = pkts[pkt_index];
key = RTE_MBUF_METADATA_UINT8_PTR(pkt, t->key_offset);
- if (dosig)
- sig = (uint64_t) t->f_hash(key, t->key_size, t->seed);
- else
- sig = RTE_MBUF_METADATA_UINT32(pkt,
- t->signature_offset);
+ sig = (uint64_t) t->f_hash(key, t->key_mask, t->key_size, t->seed);
bkt_index = sig & t->bucket_mask;
bkt = &t->buckets[bkt_index];
@@ -404,7 +464,7 @@ static int rte_table_hash_lru_lookup_unoptimized(
uint8_t *bkt_key = &t->key_mem[bkt_key_index <<
t->key_size_shl];
- if ((sig == bkt_sig) && (memcmp(key, bkt_key,
+ if ((sig == bkt_sig) && (keycmp(bkt_key, key, t->key_mask,
t->key_size) == 0)) {
uint8_t *data = &t->data_mem[bkt_key_index <<
t->data_size_shl];
@@ -502,74 +562,75 @@ static int rte_table_hash_lru_lookup_unoptimized(
match_pos = (LUT_MATCH_POS >> (mask_all << 1)) & 3; \
}
-#define lookup_cmp_key(mbuf, key, match_key, f) \
-{ \
+#define lookup_cmp_key(mbuf, key, match_key, f) \
+{ \
uint64_t *pkt_key = RTE_MBUF_METADATA_UINT64_PTR(mbuf, f->key_offset);\
- uint64_t *bkt_key = (uint64_t *) key; \
- \
- switch (f->key_size) { \
- case 8: \
- { \
- uint64_t xor = pkt_key[0] ^ bkt_key[0]; \
- match_key = 0; \
- if (xor == 0) \
- match_key = 1; \
- } \
- break; \
- \
- case 16: \
- { \
- uint64_t xor[2], or; \
- \
- xor[0] = pkt_key[0] ^ bkt_key[0]; \
- xor[1] = pkt_key[1] ^ bkt_key[1]; \
- or = xor[0] | xor[1]; \
- match_key = 0; \
- if (or == 0) \
- match_key = 1; \
- } \
- break; \
- \
- case 32: \
- { \
- uint64_t xor[4], or; \
- \
- xor[0] = pkt_key[0] ^ bkt_key[0]; \
- xor[1] = pkt_key[1] ^ bkt_key[1]; \
- xor[2] = pkt_key[2] ^ bkt_key[2]; \
- xor[3] = pkt_key[3] ^ bkt_key[3]; \
- or = xor[0] | xor[1] | xor[2] | xor[3]; \
- match_key = 0; \
- if (or == 0) \
- match_key = 1; \
- } \
- break; \
- \
- case 64: \
- { \
- uint64_t xor[8], or; \
- \
- xor[0] = pkt_key[0] ^ bkt_key[0]; \
- xor[1] = pkt_key[1] ^ bkt_key[1]; \
- xor[2] = pkt_key[2] ^ bkt_key[2]; \
- xor[3] = pkt_key[3] ^ bkt_key[3]; \
- xor[4] = pkt_key[4] ^ bkt_key[4]; \
- xor[5] = pkt_key[5] ^ bkt_key[5]; \
- xor[6] = pkt_key[6] ^ bkt_key[6]; \
- xor[7] = pkt_key[7] ^ bkt_key[7]; \
- or = xor[0] | xor[1] | xor[2] | xor[3] | \
- xor[4] | xor[5] | xor[6] | xor[7]; \
- match_key = 0; \
- if (or == 0) \
- match_key = 1; \
- } \
- break; \
- \
- default: \
- match_key = 0; \
- if (memcmp(pkt_key, bkt_key, f->key_size) == 0) \
- match_key = 1; \
- } \
+ uint64_t *bkt_key = (uint64_t *) key; \
+ uint64_t *key_mask = f->key_mask; \
+ \
+ switch (f->key_size) { \
+ case 8: \
+ { \
+ uint64_t xor = (pkt_key[0] & key_mask[0]) ^ bkt_key[0]; \
+ match_key = 0; \
+ if (xor == 0) \
+ match_key = 1; \
+ } \
+ break; \
+ \
+ case 16: \
+ { \
+ uint64_t xor[2], or; \
+ \
+ xor[0] = (pkt_key[0] & key_mask[0]) ^ bkt_key[0]; \
+ xor[1] = (pkt_key[1] & key_mask[1]) ^ bkt_key[1]; \
+ or = xor[0] | xor[1]; \
+ match_key = 0; \
+ if (or == 0) \
+ match_key = 1; \
+ } \
+ break; \
+ \
+ case 32: \
+ { \
+ uint64_t xor[4], or; \
+ \
+ xor[0] = (pkt_key[0] & key_mask[0]) ^ bkt_key[0]; \
+ xor[1] = (pkt_key[1] & key_mask[1]) ^ bkt_key[1]; \
+ xor[2] = (pkt_key[2] & key_mask[2]) ^ bkt_key[2]; \
+ xor[3] = (pkt_key[3] & key_mask[3]) ^ bkt_key[3]; \
+ or = xor[0] | xor[1] | xor[2] | xor[3]; \
+ match_key = 0; \
+ if (or == 0) \
+ match_key = 1; \
+ } \
+ break; \
+ \
+ case 64: \
+ { \
+ uint64_t xor[8], or; \
+ \
+ xor[0] = (pkt_key[0] & key_mask[0]) ^ bkt_key[0]; \
+ xor[1] = (pkt_key[1] & key_mask[1]) ^ bkt_key[1]; \
+ xor[2] = (pkt_key[2] & key_mask[2]) ^ bkt_key[2]; \
+ xor[3] = (pkt_key[3] & key_mask[3]) ^ bkt_key[3]; \
+ xor[4] = (pkt_key[4] & key_mask[4]) ^ bkt_key[4]; \
+ xor[5] = (pkt_key[5] & key_mask[5]) ^ bkt_key[5]; \
+ xor[6] = (pkt_key[6] & key_mask[6]) ^ bkt_key[6]; \
+ xor[7] = (pkt_key[7] & key_mask[7]) ^ bkt_key[7]; \
+ or = xor[0] | xor[1] | xor[2] | xor[3] | \
+ xor[4] | xor[5] | xor[6] | xor[7]; \
+ match_key = 0; \
+ if (or == 0) \
+ match_key = 1; \
+ } \
+ break; \
+ \
+ default: \
+ match_key = 0; \
+ if (keycmp(bkt_key, pkt_key, key_mask, f->key_size) == 0) \
+ match_key = 1; \
+ } \
}
#define lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index)\
@@ -616,38 +677,7 @@ static int rte_table_hash_lru_lookup_unoptimized(
rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
}
-#define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index) \
-{ \
- struct grinder *g10, *g11; \
- uint64_t sig10, sig11, bkt10_index, bkt11_index; \
- struct rte_mbuf *mbuf10, *mbuf11; \
- struct bucket *bkt10, *bkt11, *buckets = t->buckets; \
- uint64_t bucket_mask = t->bucket_mask; \
- uint32_t signature_offset = t->signature_offset; \
- \
- mbuf10 = pkts[pkt10_index]; \
- sig10 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf10, signature_offset);\
- bkt10_index = sig10 & bucket_mask; \
- bkt10 = &buckets[bkt10_index]; \
- \
- mbuf11 = pkts[pkt11_index]; \
- sig11 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf11, signature_offset);\
- bkt11_index = sig11 & bucket_mask; \
- bkt11 = &buckets[bkt11_index]; \
- \
- rte_prefetch0(bkt10); \
- rte_prefetch0(bkt11); \
- \
- g10 = &g[pkt10_index]; \
- g10->sig = sig10; \
- g10->bkt = bkt10; \
- \
- g11 = &g[pkt11_index]; \
- g11->sig = sig11; \
- g11->bkt = bkt11; \
-}
-
-#define lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index)\
+#define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index)\
{ \
struct grinder *g10, *g11; \
uint64_t sig10, sig11, bkt10_index, bkt11_index; \
@@ -662,13 +692,13 @@ static int rte_table_hash_lru_lookup_unoptimized(
\
mbuf10 = pkts[pkt10_index]; \
key10 = RTE_MBUF_METADATA_UINT8_PTR(mbuf10, key_offset);\
- sig10 = (uint64_t) f_hash(key10, key_size, seed); \
+ sig10 = (uint64_t) f_hash(key10, t->key_mask, key_size, seed);\
bkt10_index = sig10 & bucket_mask; \
bkt10 = &buckets[bkt10_index]; \
\
mbuf11 = pkts[pkt11_index]; \
key11 = RTE_MBUF_METADATA_UINT8_PTR(mbuf11, key_offset);\
- sig11 = (uint64_t) f_hash(key11, key_size, seed); \
+ sig11 = (uint64_t) f_hash(key11, t->key_mask, key_size, seed);\
bkt11_index = sig11 & bucket_mask; \
bkt11 = &buckets[bkt11_index]; \
\
@@ -819,7 +849,7 @@ static int rte_table_hash_lru_lookup(
/* Cannot run the pipeline with less than 7 packets */
if (__builtin_popcountll(pkts_mask) < 7)
return rte_table_hash_lru_lookup_unoptimized(table, pkts,
- pkts_mask, lookup_hit_mask, entries, 0);
+ pkts_mask, lookup_hit_mask, entries);
/* Pipeline stage 0 */
lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
@@ -923,140 +953,7 @@ static int rte_table_hash_lru_lookup(
uint64_t pkts_mask_out_slow = 0;
status = rte_table_hash_lru_lookup_unoptimized(table, pkts,
- pkts_mask_match_many, &pkts_mask_out_slow, entries, 0);
- pkts_mask_out |= pkts_mask_out_slow;
- }
-
- *lookup_hit_mask = pkts_mask_out;
- RTE_TABLE_HASH_LRU_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out));
- return status;
-}
-
-static int rte_table_hash_lru_lookup_dosig(
- void *table,
- struct rte_mbuf **pkts,
- uint64_t pkts_mask,
- uint64_t *lookup_hit_mask,
- void **entries)
-{
- struct rte_table_hash *t = (struct rte_table_hash *) table;
- struct grinder *g = t->grinders;
- uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index;
- uint64_t pkt20_index, pkt21_index, pkt30_index, pkt31_index;
- uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0;
- int status = 0;
-
- __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
- RTE_TABLE_HASH_LRU_STATS_PKTS_IN_ADD(t, n_pkts_in);
-
- /* Cannot run the pipeline with less than 7 packets */
- if (__builtin_popcountll(pkts_mask) < 7)
- return rte_table_hash_lru_lookup_unoptimized(table, pkts,
- pkts_mask, lookup_hit_mask, entries, 1);
-
- /* Pipeline stage 0 */
- lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
-
- /* Pipeline feed */
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 0 */
- lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
-
- /* Pipeline feed */
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 0 */
- lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
-
- /* Pipeline stage 2 */
- lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
-
- /*
- * Pipeline run
- *
- */
- for ( ; pkts_mask; ) {
- /* Pipeline feed */
- pkt30_index = pkt20_index;
- pkt31_index = pkt21_index;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 0 */
- lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask,
- pkt00_index, pkt01_index);
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
-
- /* Pipeline stage 2 */
- lookup2_stage2(t, g, pkt20_index, pkt21_index,
- pkts_mask_match_many);
-
- /* Pipeline stage 3 */
- lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index,
- pkts_mask_out, entries);
- }
-
- /* Pipeline feed */
- pkt30_index = pkt20_index;
- pkt31_index = pkt21_index;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
- pkt10_index = pkt00_index;
- pkt11_index = pkt01_index;
-
- /* Pipeline stage 1 */
- lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
-
- /* Pipeline stage 2 */
- lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
-
- /* Pipeline stage 3 */
- lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
- entries);
-
- /* Pipeline feed */
- pkt30_index = pkt20_index;
- pkt31_index = pkt21_index;
- pkt20_index = pkt10_index;
- pkt21_index = pkt11_index;
-
- /* Pipeline stage 2 */
- lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
-
- /* Pipeline stage 3 */
- lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
- entries);
-
- /* Pipeline feed */
- pkt30_index = pkt20_index;
- pkt31_index = pkt21_index;
-
- /* Pipeline stage 3 */
- lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out,
- entries);
-
- /* Slow path */
- pkts_mask_match_many &= ~pkts_mask_out;
- if (pkts_mask_match_many) {
- uint64_t pkts_mask_out_slow = 0;
-
- status = rte_table_hash_lru_lookup_unoptimized(table, pkts,
- pkts_mask_match_many, &pkts_mask_out_slow, entries, 1);
+ pkts_mask_match_many, &pkts_mask_out_slow, entries);
pkts_mask_out |= pkts_mask_out_slow;
}
@@ -1089,14 +986,3 @@ struct rte_table_ops rte_table_hash_lru_ops = {
.f_lookup = rte_table_hash_lru_lookup,
.f_stats = rte_table_hash_lru_stats_read,
};
-
-struct rte_table_ops rte_table_hash_lru_dosig_ops = {
- .f_create = rte_table_hash_lru_create,
- .f_free = rte_table_hash_lru_free,
- .f_add = rte_table_hash_lru_entry_add,
- .f_delete = rte_table_hash_lru_entry_delete,
- .f_add_bulk = NULL,
- .f_delete_bulk = NULL,
- .f_lookup = rte_table_hash_lru_lookup_dosig,
- .f_stats = rte_table_hash_lru_stats_read,
-};
diff --git a/lib/librte_table/rte_table_version.map b/lib/librte_table/rte_table_version.map
index e1eaa275..6237252b 100644
--- a/lib/librte_table/rte_table_version.map
+++ b/lib/librte_table/rte_table_version.map
@@ -1,19 +1,16 @@
-DPDK_2.0 {
+DPDK_17.11 {
global:
rte_table_acl_ops;
rte_table_array_ops;
- rte_table_hash_ext_dosig_ops;
+ rte_table_hash_cuckoo_ops;
rte_table_hash_ext_ops;
- rte_table_hash_key8_ext_dosig_ops;
- rte_table_hash_key8_ext_ops;
- rte_table_hash_key8_lru_dosig_ops;
- rte_table_hash_key8_lru_ops;
rte_table_hash_key16_ext_ops;
rte_table_hash_key16_lru_ops;
rte_table_hash_key32_ext_ops;
rte_table_hash_key32_lru_ops;
- rte_table_hash_lru_dosig_ops;
+ rte_table_hash_key8_ext_ops;
+ rte_table_hash_key8_lru_ops;
rte_table_hash_lru_ops;
rte_table_lpm_ipv6_ops;
rte_table_lpm_ops;
@@ -21,18 +18,3 @@ DPDK_2.0 {
local: *;
};
-
-DPDK_2.2 {
- global:
-
- rte_table_hash_key16_ext_dosig_ops;
- rte_table_hash_key16_lru_dosig_ops;
-
-};
-
-DPDK_16.07 {
- global:
-
- rte_table_hash_cuckoo_dosig_ops;
-
-} DPDK_2.0;
diff --git a/lib/librte_timer/Makefile b/lib/librte_timer/Makefile
index 03a15390..eb9c5624 100644
--- a/lib/librte_timer/Makefile
+++ b/lib/librte_timer/Makefile
@@ -35,6 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_timer.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+LDLIBS += -lrte_eal
EXPORT_MAP := rte_timer_version.map
diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c
index 5ee08408..28decc39 100644
--- a/lib/librte_timer/rte_timer.c
+++ b/lib/librte_timer/rte_timer.c
@@ -43,7 +43,6 @@
#include <rte_cycles.h>
#include <rte_per_lcore.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_lcore.h>
@@ -432,7 +431,8 @@ rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
uint64_t period;
if (unlikely((tim_lcore != (unsigned)LCORE_ID_ANY) &&
- !rte_lcore_is_enabled(tim_lcore)))
+ !(rte_lcore_is_enabled(tim_lcore) ||
+ rte_lcore_has_role(tim_lcore, ROLE_SERVICE))))
return -1;
if (type == PERIODICAL)
@@ -525,7 +525,7 @@ void rte_timer_manage(void)
return;
cur_time = rte_get_timer_cycles();
-#ifdef RTE_ARCH_X86_64
+#ifdef RTE_ARCH_64
/* on 64-bit the value cached in the pending_head.expired will be
* updated atomically, so we can consult that for a quick check here
* outside the lock */
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 4a116fe3..be182798 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -45,10 +45,11 @@ LDLIBS += -lpthread
ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
LDLIBS += -lnuma
endif
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
# all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c socket.c vhost.c vhost_user.c \
- virtio_net.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
+ vhost_user.c virtio_net.c
# install includes
SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h
diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c
index 2ceacc9a..4c6fed41 100644
--- a/lib/librte_vhost/fd_man.c
+++ b/lib/librte_vhost/fd_man.c
@@ -222,6 +222,7 @@ fdset_event_dispatch(void *arg)
int remove1, remove2;
int need_shrink;
struct fdset *pfdset = arg;
+ int val;
if (pfdset == NULL)
return NULL;
@@ -239,7 +240,9 @@ fdset_event_dispatch(void *arg)
numfds = pfdset->num;
pthread_mutex_unlock(&pfdset->fd_mutex);
- poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
+ val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
+ if (val < 0)
+ continue;
need_shrink = 0;
for (i = 0; i < numfds; i++) {
diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c
new file mode 100644
index 00000000..b74cc6a7
--- /dev/null
+++ b/lib/librte_vhost/iotlb.c
@@ -0,0 +1,350 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) 2017 Red Hat, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numaif.h>
+#endif
+
+#include <rte_tailq.h>
+
+#include "iotlb.h"
+#include "vhost.h"
+
+struct vhost_iotlb_entry {
+ TAILQ_ENTRY(vhost_iotlb_entry) next;
+
+ uint64_t iova;
+ uint64_t uaddr;
+ uint64_t size;
+ uint8_t perm;
+};
+
+#define IOTLB_CACHE_SIZE 2048
+
+static void
+vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq)
+{
+ struct vhost_iotlb_entry *node, *temp_node;
+
+ rte_rwlock_write_lock(&vq->iotlb_pending_lock);
+
+ TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
+ TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
+ rte_mempool_put(vq->iotlb_pool, node);
+ }
+
+ rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
+}
+
+bool
+vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova,
+ uint8_t perm)
+{
+ struct vhost_iotlb_entry *node;
+ bool found = false;
+
+ rte_rwlock_read_lock(&vq->iotlb_pending_lock);
+
+ TAILQ_FOREACH(node, &vq->iotlb_pending_list, next) {
+ if ((node->iova == iova) && (node->perm == perm)) {
+ found = true;
+ break;
+ }
+ }
+
+ rte_rwlock_read_unlock(&vq->iotlb_pending_lock);
+
+ return found;
+}
+
+void
+vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq,
+ uint64_t iova, uint8_t perm)
+{
+ struct vhost_iotlb_entry *node;
+ int ret;
+
+ ret = rte_mempool_get(vq->iotlb_pool, (void **)&node);
+ if (ret) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "IOTLB pool empty, clear pending misses\n");
+ vhost_user_iotlb_pending_remove_all(vq);
+ ret = rte_mempool_get(vq->iotlb_pool, (void **)&node);
+ if (ret) {
+ RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
+ return;
+ }
+ }
+
+ node->iova = iova;
+ node->perm = perm;
+
+ rte_rwlock_write_lock(&vq->iotlb_pending_lock);
+
+ TAILQ_INSERT_TAIL(&vq->iotlb_pending_list, node, next);
+
+ rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
+}
+
+static void
+vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq,
+ uint64_t iova, uint64_t size, uint8_t perm)
+{
+ struct vhost_iotlb_entry *node, *temp_node;
+
+ rte_rwlock_write_lock(&vq->iotlb_pending_lock);
+
+ TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
+ if (node->iova < iova)
+ continue;
+ if (node->iova >= iova + size)
+ continue;
+ if ((node->perm & perm) != node->perm)
+ continue;
+ TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
+ rte_mempool_put(vq->iotlb_pool, node);
+ }
+
+ rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
+}
+
+static void
+vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
+{
+ struct vhost_iotlb_entry *node, *temp_node;
+
+ rte_rwlock_write_lock(&vq->iotlb_lock);
+
+ TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
+ TAILQ_REMOVE(&vq->iotlb_list, node, next);
+ rte_mempool_put(vq->iotlb_pool, node);
+ }
+
+ vq->iotlb_cache_nr = 0;
+
+ rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+static void
+vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq)
+{
+ struct vhost_iotlb_entry *node, *temp_node;
+ int entry_idx;
+
+ rte_rwlock_write_lock(&vq->iotlb_lock);
+
+ entry_idx = rte_rand() % vq->iotlb_cache_nr;
+
+ TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
+ if (!entry_idx) {
+ TAILQ_REMOVE(&vq->iotlb_list, node, next);
+ rte_mempool_put(vq->iotlb_pool, node);
+ vq->iotlb_cache_nr--;
+ break;
+ }
+ entry_idx--;
+ }
+
+ rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+void
+vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
+ uint64_t uaddr, uint64_t size, uint8_t perm)
+{
+ struct vhost_iotlb_entry *node, *new_node;
+ int ret;
+
+ ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
+ if (ret) {
+ RTE_LOG(DEBUG, VHOST_CONFIG, "IOTLB pool empty, evict one entry\n");
+ vhost_user_iotlb_cache_random_evict(vq);
+ ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
+ if (ret) {
+ RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
+ return;
+ }
+ }
+
+ new_node->iova = iova;
+ new_node->uaddr = uaddr;
+ new_node->size = size;
+ new_node->perm = perm;
+
+ rte_rwlock_write_lock(&vq->iotlb_lock);
+
+ TAILQ_FOREACH(node, &vq->iotlb_list, next) {
+ /*
+ * Entries must be invalidated before being updated.
+ * So if iova already in list, assume identical.
+ */
+ if (node->iova == new_node->iova) {
+ rte_mempool_put(vq->iotlb_pool, new_node);
+ goto unlock;
+ } else if (node->iova > new_node->iova) {
+ TAILQ_INSERT_BEFORE(node, new_node, next);
+ vq->iotlb_cache_nr++;
+ goto unlock;
+ }
+ }
+
+ TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next);
+ vq->iotlb_cache_nr++;
+
+unlock:
+ vhost_user_iotlb_pending_remove(vq, iova, size, perm);
+
+ rte_rwlock_write_unlock(&vq->iotlb_lock);
+
+}
+
+void
+vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
+ uint64_t iova, uint64_t size)
+{
+ struct vhost_iotlb_entry *node, *temp_node;
+
+ if (unlikely(!size))
+ return;
+
+ rte_rwlock_write_lock(&vq->iotlb_lock);
+
+ TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
+ /* Sorted list */
+ if (unlikely(iova + size < node->iova))
+ break;
+
+ if (iova < node->iova + node->size) {
+ TAILQ_REMOVE(&vq->iotlb_list, node, next);
+ rte_mempool_put(vq->iotlb_pool, node);
+ vq->iotlb_cache_nr--;
+ }
+ }
+
+ rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+uint64_t
+vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
+ uint64_t *size, uint8_t perm)
+{
+ struct vhost_iotlb_entry *node;
+ uint64_t offset, vva = 0, mapped = 0;
+
+ if (unlikely(!*size))
+ goto out;
+
+ TAILQ_FOREACH(node, &vq->iotlb_list, next) {
+ /* List sorted by iova */
+ if (unlikely(iova < node->iova))
+ break;
+
+ if (iova >= node->iova + node->size)
+ continue;
+
+ if (unlikely((perm & node->perm) != perm)) {
+ vva = 0;
+ break;
+ }
+
+ offset = iova - node->iova;
+ if (!vva)
+ vva = node->uaddr + offset;
+
+ mapped += node->size - offset;
+ iova = node->iova + node->size;
+
+ if (mapped >= *size)
+ break;
+ }
+
+out:
+ /* Only part of the requested chunk is mapped */
+ if (unlikely(mapped < *size))
+ *size = mapped;
+
+ return vva;
+}
+
+int
+vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
+{
+ char pool_name[RTE_MEMPOOL_NAMESIZE];
+ struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
+ int socket = 0;
+
+ if (vq->iotlb_pool) {
+ /*
+ * The cache has already been initialized,
+ * just drop all cached and pending entries.
+ */
+ vhost_user_iotlb_cache_remove_all(vq);
+ vhost_user_iotlb_pending_remove_all(vq);
+ }
+
+#ifdef RTE_LIBRTE_VHOST_NUMA
+ if (get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR) != 0)
+ socket = 0;
+#endif
+
+ rte_rwlock_init(&vq->iotlb_lock);
+ rte_rwlock_init(&vq->iotlb_pending_lock);
+
+ TAILQ_INIT(&vq->iotlb_list);
+ TAILQ_INIT(&vq->iotlb_pending_list);
+
+ snprintf(pool_name, sizeof(pool_name), "iotlb_cache_%d_%d",
+ dev->vid, vq_index);
+
+ /* If already created, free it and recreate */
+ vq->iotlb_pool = rte_mempool_lookup(pool_name);
+ if (vq->iotlb_pool)
+ rte_mempool_free(vq->iotlb_pool);
+
+ vq->iotlb_pool = rte_mempool_create(pool_name,
+ IOTLB_CACHE_SIZE, sizeof(struct vhost_iotlb_entry), 0,
+ 0, 0, NULL, NULL, NULL, socket,
+ MEMPOOL_F_NO_CACHE_ALIGN |
+ MEMPOOL_F_SP_PUT |
+ MEMPOOL_F_SC_GET);
+ if (!vq->iotlb_pool) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to create IOTLB cache pool (%s)\n",
+ pool_name);
+ return -1;
+ }
+
+ vq->iotlb_cache_nr = 0;
+
+ return 0;
+}
+
diff --git a/lib/librte_vhost/iotlb.h b/lib/librte_vhost/iotlb.h
new file mode 100644
index 00000000..f1a050e4
--- /dev/null
+++ b/lib/librte_vhost/iotlb.h
@@ -0,0 +1,76 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) 2017 Red Hat, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _VHOST_IOTLB_H_
+#define _VHOST_IOTLB_H_
+
+#include <stdbool.h>
+
+#include "vhost.h"
+
+static __rte_always_inline void
+vhost_user_iotlb_rd_lock(struct vhost_virtqueue *vq)
+{
+ rte_rwlock_read_lock(&vq->iotlb_lock);
+}
+
+static __rte_always_inline void
+vhost_user_iotlb_rd_unlock(struct vhost_virtqueue *vq)
+{
+ rte_rwlock_read_unlock(&vq->iotlb_lock);
+}
+
+static __rte_always_inline void
+vhost_user_iotlb_wr_lock(struct vhost_virtqueue *vq)
+{
+ rte_rwlock_write_lock(&vq->iotlb_lock);
+}
+
+static __rte_always_inline void
+vhost_user_iotlb_wr_unlock(struct vhost_virtqueue *vq)
+{
+ rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+void vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
+ uint64_t uaddr, uint64_t size,
+ uint8_t perm);
+void vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
+ uint64_t iova, uint64_t size);
+uint64_t vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
+ uint64_t *size, uint8_t perm);
+bool vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova,
+ uint8_t perm);
+void vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq, uint64_t iova,
+ uint8_t perm);
+int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index);
+
+#endif /* _VHOST_IOTLB_H_ */
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index 8c974eb1..f6536449 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -56,6 +56,7 @@ extern "C" {
#define RTE_VHOST_USER_CLIENT (1ULL << 0)
#define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
#define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
+#define RTE_VHOST_USER_IOMMU_SUPPORT (1ULL << 3)
/**
* Information relating to memory regions including offsets to
@@ -107,7 +108,10 @@ struct vhost_device_ops {
*/
int (*features_changed)(int vid, uint64_t features);
- void *reserved[4]; /**< Reserved for future extension */
+ int (*new_connection)(int vid);
+ void (*destroy_connection)(int vid);
+
+ void *reserved[2]; /**< Reserved for future extension */
};
/**
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 41aa3f9b..422da002 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -68,6 +68,7 @@ struct vhost_user_socket {
bool is_server;
bool reconnect;
bool dequeue_zero_copy;
+ bool iommu_support;
/*
* The "supported_features" indicates the feature bits the
@@ -217,9 +218,7 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
vid = vhost_new_device();
if (vid == -1) {
- close(fd);
- free(conn);
- return;
+ goto err;
}
size = strnlen(vsocket->path, PATH_MAX);
@@ -230,24 +229,40 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
+ if (vsocket->notify_ops->new_connection) {
+ ret = vsocket->notify_ops->new_connection(vid);
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to add vhost user connection with fd %d\n",
+ fd);
+ goto err;
+ }
+ }
+
conn->connfd = fd;
conn->vsocket = vsocket;
conn->vid = vid;
ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
NULL, conn);
if (ret < 0) {
- conn->connfd = -1;
- free(conn);
- close(fd);
RTE_LOG(ERR, VHOST_CONFIG,
"failed to add fd %d into vhost server fdset\n",
fd);
- return;
+
+ if (vsocket->notify_ops->destroy_connection)
+ vsocket->notify_ops->destroy_connection(conn->vid);
+
+ goto err;
}
pthread_mutex_lock(&vsocket->conn_mutex);
TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
pthread_mutex_unlock(&vsocket->conn_mutex);
+ return;
+
+err:
+ free(conn);
+ close(fd);
}
/* call back when there is new vhost-user connection from client */
@@ -277,6 +292,9 @@ vhost_user_read_cb(int connfd, void *dat, int *remove)
*remove = 1;
vhost_destroy_device(conn->vid);
+ if (vsocket->notify_ops->destroy_connection)
+ vsocket->notify_ops->destroy_connection(conn->vid);
+
pthread_mutex_lock(&vsocket->conn_mutex);
TAILQ_REMOVE(&vsocket->conn_list, conn, next);
pthread_mutex_unlock(&vsocket->conn_mutex);
@@ -652,6 +670,11 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
vsocket->features = VIRTIO_NET_SUPPORTED_FEATURES;
+ if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
+ vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+ vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
+ }
+
if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
if (vsocket->reconnect && reconn_tid == 0) {
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 0b6aa1cc..4f8b73a0 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -47,11 +47,49 @@
#include <rte_memory.h>
#include <rte_malloc.h>
#include <rte_vhost.h>
+#include <rte_rwlock.h>
+#include "iotlb.h"
#include "vhost.h"
+#include "vhost_user.h"
struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
+/* Called with iotlb_lock read-locked */
+uint64_t
+__vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t iova, uint64_t size, uint8_t perm)
+{
+ uint64_t vva, tmp_size;
+
+ if (unlikely(!size))
+ return 0;
+
+ tmp_size = size;
+
+ vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
+ if (tmp_size == size)
+ return vva;
+
+ if (!vhost_user_iotlb_pending_miss(vq, iova + tmp_size, perm)) {
+ /*
+ * iotlb_lock is read-locked for a full burst,
+ * but it only protects the iotlb cache.
+ * In case of IOTLB miss, we might block on the socket,
+ * which could cause a deadlock with QEMU if an IOTLB update
+ * is being handled. We can safely unlock here to avoid it.
+ */
+ vhost_user_iotlb_rd_unlock(vq);
+
+ vhost_user_iotlb_pending_insert(vq, iova + tmp_size, perm);
+ vhost_user_iotlb_miss(dev, iova + tmp_size, perm);
+
+ vhost_user_iotlb_rd_lock(vq);
+ }
+
+ return 0;
+}
+
struct virtio_net *
get_device(int vid)
{
@@ -102,40 +140,108 @@ free_device(struct virtio_net *dev)
vq = dev->virtqueue[i];
rte_free(vq->shadow_used_ring);
-
+ rte_free(vq->batch_copy_elems);
+ rte_mempool_free(vq->iotlb_pool);
rte_free(vq);
}
rte_free(dev);
}
+int
+vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+ uint64_t size;
+
+ if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
+ goto out;
+
+ size = sizeof(struct vring_desc) * vq->size;
+ vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
+ vq->ring_addrs.desc_user_addr,
+ size, VHOST_ACCESS_RW);
+ if (!vq->desc)
+ return -1;
+
+ size = sizeof(struct vring_avail);
+ size += sizeof(uint16_t) * vq->size;
+ vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
+ vq->ring_addrs.avail_user_addr,
+ size, VHOST_ACCESS_RW);
+ if (!vq->avail)
+ return -1;
+
+ size = sizeof(struct vring_used);
+ size += sizeof(struct vring_used_elem) * vq->size;
+ vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
+ vq->ring_addrs.used_user_addr,
+ size, VHOST_ACCESS_RW);
+ if (!vq->used)
+ return -1;
+
+out:
+ vq->access_ok = 1;
+
+ return 0;
+}
+
+void
+vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_wr_lock(vq);
+
+ vq->access_ok = 0;
+ vq->desc = NULL;
+ vq->avail = NULL;
+ vq->used = NULL;
+
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_wr_unlock(vq);
+}
+
static void
-init_vring_queue(struct vhost_virtqueue *vq)
+init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
{
+ struct vhost_virtqueue *vq;
+
+ if (vring_idx >= VHOST_MAX_VRING) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed not init vring, out of bound (%d)\n",
+ vring_idx);
+ return;
+ }
+
+ vq = dev->virtqueue[vring_idx];
+
memset(vq, 0, sizeof(struct vhost_virtqueue));
vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+ vhost_user_iotlb_init(dev, vring_idx);
/* Backends are set to -1 indicating an inactive device. */
vq->backend = -1;
- /*
- * always set the vq to enabled; this is to keep compatibility
- * with the old QEMU, whereas there is no SET_VRING_ENABLE message.
- */
- vq->enabled = 1;
-
TAILQ_INIT(&vq->zmbuf_list);
}
static void
-reset_vring_queue(struct vhost_virtqueue *vq)
+reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
{
+ struct vhost_virtqueue *vq;
int callfd;
+ if (vring_idx >= VHOST_MAX_VRING) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed not init vring, out of bound (%d)\n",
+ vring_idx);
+ return;
+ }
+
+ vq = dev->virtqueue[vring_idx];
callfd = vq->callfd;
- init_vring_queue(vq);
+ init_vring_queue(dev, vring_idx);
vq->callfd = callfd;
}
@@ -152,7 +258,7 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
}
dev->virtqueue[vring_idx] = vq;
- init_vring_queue(vq);
+ init_vring_queue(dev, vring_idx);
dev->nr_vring += 1;
@@ -174,7 +280,7 @@ reset_device(struct virtio_net *dev)
dev->flags = 0;
for (i = 0; i < dev->nr_vring; i++)
- reset_vring_queue(dev->virtqueue[i]);
+ reset_vring_queue(dev, i);
}
/*
@@ -207,6 +313,7 @@ vhost_new_device(void)
vhost_devices[i] = dev;
dev->vid = i;
+ dev->slave_req_fd = -1;
return i;
}
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 6fe72aeb..1cc81c17 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -45,6 +45,7 @@
#include <rte_log.h>
#include <rte_ether.h>
+#include <rte_rwlock.h>
#include "rte_vhost.h"
@@ -81,6 +82,16 @@ struct zcopy_mbuf {
};
TAILQ_HEAD(zcopy_mbuf_list, zcopy_mbuf);
+/*
+ * Structure contains the info for each batched memory copy.
+ */
+struct batch_copy_elem {
+ void *dst;
+ void *src;
+ uint32_t len;
+ uint64_t log_addr;
+};
+
/**
* Structure contains variables relevant to RX/TX virtqueues.
*/
@@ -102,6 +113,7 @@ struct vhost_virtqueue {
/* Currently unused as polling mode is enabled */
int kickfd;
int enabled;
+ int access_ok;
/* Physical address of used ring, for logging */
uint64_t log_guest_addr;
@@ -114,6 +126,17 @@ struct vhost_virtqueue {
struct vring_used_elem *shadow_used_ring;
uint16_t shadow_used_idx;
+ struct vhost_vring_addr ring_addrs;
+
+ struct batch_copy_elem *batch_copy_elems;
+ uint16_t batch_copy_nb_elems;
+
+ rte_rwlock_t iotlb_lock;
+ rte_rwlock_t iotlb_pending_lock;
+ struct rte_mempool *iotlb_pool;
+ TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list;
+ int iotlb_cache_nr;
+ TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list;
} __rte_cache_aligned;
/* Old kernels have no such macros defined */
@@ -132,6 +155,37 @@ struct vhost_virtqueue {
#define VIRTIO_NET_F_MTU 3
#endif
+/* Declare IOMMU related bits for older kernels */
+#ifndef VIRTIO_F_IOMMU_PLATFORM
+
+#define VIRTIO_F_IOMMU_PLATFORM 33
+
+struct vhost_iotlb_msg {
+ __u64 iova;
+ __u64 size;
+ __u64 uaddr;
+#define VHOST_ACCESS_RO 0x1
+#define VHOST_ACCESS_WO 0x2
+#define VHOST_ACCESS_RW 0x3
+ __u8 perm;
+#define VHOST_IOTLB_MISS 1
+#define VHOST_IOTLB_UPDATE 2
+#define VHOST_IOTLB_INVALIDATE 3
+#define VHOST_IOTLB_ACCESS_FAIL 4
+ __u8 type;
+};
+
+#define VHOST_IOTLB_MSG 0x1
+
+struct vhost_msg {
+ int type;
+ union {
+ struct vhost_iotlb_msg iotlb;
+ __u8 padding[64];
+ };
+};
+#endif
+
/*
* Define virtio 1.0 for older kernels
*/
@@ -157,7 +211,8 @@ struct vhost_virtqueue {
(1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
(1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
- (1ULL << VIRTIO_NET_F_MTU))
+ (1ULL << VIRTIO_NET_F_MTU) | \
+ (1ULL << VIRTIO_F_IOMMU_PLATFORM))
struct guest_page {
@@ -196,6 +251,8 @@ struct virtio_net {
uint32_t nr_guest_pages;
uint32_t max_guest_pages;
struct guest_page *guest_pages;
+
+ int slave_req_fd;
} __rte_cache_aligned;
@@ -281,7 +338,7 @@ extern uint64_t VHOST_FEATURES;
extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
/* Convert guest physical address to host physical address */
-static __rte_always_inline phys_addr_t
+static __rte_always_inline rte_iova_t
gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
{
uint32_t i;
@@ -321,4 +378,19 @@ struct vhost_device_ops const *vhost_driver_callback_get(const char *path);
*/
void vhost_backend_cleanup(struct virtio_net *dev);
+uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t iova, uint64_t size, uint8_t perm);
+int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
+void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq);
+
+static __rte_always_inline uint64_t
+vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t iova, uint64_t size, uint8_t perm)
+{
+ if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
+ return rte_vhost_gpa_to_vva(dev->mem, iova);
+
+ return __vhost_iova_to_vva(dev, vq, iova, size, perm);
+}
+
#endif /* _VHOST_NET_CDEV_H_ */
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index ad2e8d38..f4c7ce46 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -48,6 +48,7 @@
#include <rte_malloc.h>
#include <rte_log.h>
+#include "iotlb.h"
#include "vhost.h"
#include "vhost_user.h"
@@ -76,6 +77,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
[VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
[VHOST_USER_NET_SET_MTU] = "VHOST_USER_NET_SET_MTU",
+ [VHOST_USER_SET_SLAVE_REQ_FD] = "VHOST_USER_SET_SLAVE_REQ_FD",
+ [VHOST_USER_IOTLB_MSG] = "VHOST_USER_IOTLB_MSG",
};
static uint64_t
@@ -122,6 +125,11 @@ vhost_backend_cleanup(struct virtio_net *dev)
munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
dev->log_addr = 0;
}
+
+ if (dev->slave_req_fd >= 0) {
+ close(dev->slave_req_fd);
+ dev->slave_req_fd = -1;
+ }
}
/*
@@ -230,6 +238,15 @@ vhost_user_set_vring_num(struct virtio_net *dev,
return -1;
}
+ vq->batch_copy_elems = rte_malloc(NULL,
+ vq->size * sizeof(struct batch_copy_elem),
+ RTE_CACHE_LINE_SIZE);
+ if (!vq->batch_copy_elems) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to allocate memory for batching copy.\n");
+ return -1;
+ }
+
return 0;
}
@@ -297,6 +314,9 @@ out:
dev->virtqueue[index] = vq;
vhost_devices[dev->vid] = dev;
+ if (old_vq != vq)
+ vhost_user_iotlb_init(dev, index);
+
return dev;
}
#else
@@ -307,10 +327,7 @@ numa_realloc(struct virtio_net *dev, int index __rte_unused)
}
#endif
-/*
- * Converts QEMU virtual address to Vhost virtual address. This function is
- * used to convert the ring addresses to our address space.
- */
+/* Converts QEMU virtual address to Vhost virtual address. */
static uint64_t
qva_to_vva(struct virtio_net *dev, uint64_t qva)
{
@@ -331,50 +348,69 @@ qva_to_vva(struct virtio_net *dev, uint64_t qva)
return 0;
}
+
/*
- * The virtio device sends us the desc, used and avail ring addresses.
- * This function then converts these to our address space.
+ * Converts ring address to Vhost virtual address.
+ * If IOMMU is enabled, the ring address is a guest IO virtual address,
+ * else it is a QEMU virtual address.
*/
-static int
-vhost_user_set_vring_addr(struct virtio_net *dev, VhostUserMsg *msg)
+static uint64_t
+ring_addr_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t ra, uint64_t size)
{
- struct vhost_virtqueue *vq;
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
+ uint64_t vva;
- if (dev->mem == NULL)
- return -1;
+ vva = vhost_user_iotlb_cache_find(vq, ra,
+ &size, VHOST_ACCESS_RW);
+ if (!vva)
+ vhost_user_iotlb_miss(dev, ra, VHOST_ACCESS_RW);
- /* addr->index refers to the queue index. The txq 1, rxq is 0. */
- vq = dev->virtqueue[msg->payload.addr.index];
+ return vva;
+ }
+
+ return qva_to_vva(dev, ra);
+}
+
+static struct virtio_net *
+translate_ring_addresses(struct virtio_net *dev, int vq_index)
+{
+ struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
+ struct vhost_vring_addr *addr = &vq->ring_addrs;
/* The addresses are converted from QEMU virtual to Vhost virtual. */
- vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev,
- msg->payload.addr.desc_user_addr);
+ if (vq->desc && vq->avail && vq->used)
+ return dev;
+
+ vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev,
+ vq, addr->desc_user_addr, sizeof(struct vring_desc));
if (vq->desc == 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
+ RTE_LOG(DEBUG, VHOST_CONFIG,
"(%d) failed to find desc ring address.\n",
dev->vid);
- return -1;
+ return dev;
}
- dev = numa_realloc(dev, msg->payload.addr.index);
- vq = dev->virtqueue[msg->payload.addr.index];
+ dev = numa_realloc(dev, vq_index);
+ vq = dev->virtqueue[vq_index];
+ addr = &vq->ring_addrs;
- vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev,
- msg->payload.addr.avail_user_addr);
+ vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev,
+ vq, addr->avail_user_addr, sizeof(struct vring_avail));
if (vq->avail == 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
+ RTE_LOG(DEBUG, VHOST_CONFIG,
"(%d) failed to find avail ring address.\n",
dev->vid);
- return -1;
+ return dev;
}
- vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev,
- msg->payload.addr.used_user_addr);
+ vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev,
+ vq, addr->used_user_addr, sizeof(struct vring_used));
if (vq->used == 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
+ RTE_LOG(DEBUG, VHOST_CONFIG,
"(%d) failed to find used ring address.\n",
dev->vid);
- return -1;
+ return dev;
}
if (vq->last_used_idx != vq->used->idx) {
@@ -386,7 +422,7 @@ vhost_user_set_vring_addr(struct virtio_net *dev, VhostUserMsg *msg)
vq->last_avail_idx = vq->used->idx;
}
- vq->log_guest_addr = msg->payload.addr.log_guest_addr;
+ vq->log_guest_addr = addr->log_guest_addr;
LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
dev->vid, vq->desc);
@@ -397,6 +433,43 @@ vhost_user_set_vring_addr(struct virtio_net *dev, VhostUserMsg *msg)
LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n",
dev->vid, vq->log_guest_addr);
+ return dev;
+}
+
+/*
+ * The virtio device sends us the desc, used and avail ring addresses.
+ * This function then converts these to our address space.
+ */
+static int
+vhost_user_set_vring_addr(struct virtio_net **pdev, VhostUserMsg *msg)
+{
+ struct vhost_virtqueue *vq;
+ struct vhost_vring_addr *addr = &msg->payload.addr;
+ struct virtio_net *dev = *pdev;
+
+ if (dev->mem == NULL)
+ return -1;
+
+ /* addr->index refers to the queue index. The txq 1, rxq is 0. */
+ vq = dev->virtqueue[msg->payload.addr.index];
+
+ /*
+ * Rings addresses should not be interpreted as long as the ring is not
+ * started and enabled
+ */
+ memcpy(&vq->ring_addrs, addr, sizeof(*addr));
+
+ vring_invalidate(dev, vq);
+
+ if (vq->enabled && (dev->features &
+ (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) {
+ dev = translate_ring_addresses(dev, msg->payload.state.index);
+ if (!dev)
+ return -1;
+
+ *pdev = dev;
+ }
+
return 0;
}
@@ -453,7 +526,7 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
uint64_t host_phys_addr;
uint64_t size;
- host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t)host_user_addr);
+ host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)host_user_addr);
size = page_size - (guest_phys_addr & (page_size - 1));
size = RTE_MIN(size, reg_size);
@@ -464,7 +537,7 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
while (reg_size > 0) {
size = RTE_MIN(reg_size, page_size);
- host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t)
+ host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
host_user_addr);
add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size);
@@ -620,7 +693,7 @@ err_mmap:
static int
vq_is_ready(struct vhost_virtqueue *vq)
{
- return vq && vq->desc &&
+ return vq && vq->desc && vq->avail && vq->used &&
vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
}
@@ -668,10 +741,11 @@ vhost_user_set_vring_call(struct virtio_net *dev, struct VhostUserMsg *pmsg)
}
static void
-vhost_user_set_vring_kick(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
{
struct vhost_vring_file file;
struct vhost_virtqueue *vq;
+ struct virtio_net *dev = *pdev;
file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
@@ -681,7 +755,23 @@ vhost_user_set_vring_kick(struct virtio_net *dev, struct VhostUserMsg *pmsg)
RTE_LOG(INFO, VHOST_CONFIG,
"vring kick idx:%d file:%d\n", file.index, file.fd);
+ /* Interpret ring addresses only when ring is started. */
+ dev = translate_ring_addresses(dev, file.index);
+ if (!dev)
+ return;
+
+ *pdev = dev;
+
vq = dev->virtqueue[file.index];
+
+ /*
+ * When VHOST_USER_F_PROTOCOL_FEATURES is not negotiated,
+ * the ring starts already enabled. Otherwise, it is enabled via
+ * the SET_VRING_ENABLE message.
+ */
+ if (!(dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)))
+ vq->enabled = 1;
+
if (vq->kickfd >= 0)
close(vq->kickfd);
vq->kickfd = file.fd;
@@ -741,6 +831,9 @@ vhost_user_get_vring_base(struct virtio_net *dev,
rte_free(vq->shadow_used_ring);
vq->shadow_used_ring = NULL;
+ rte_free(vq->batch_copy_elems);
+ vq->batch_copy_elems = NULL;
+
return 0;
}
@@ -768,6 +861,27 @@ vhost_user_set_vring_enable(struct virtio_net *dev,
}
static void
+vhost_user_get_protocol_features(struct virtio_net *dev,
+ struct VhostUserMsg *msg)
+{
+ uint64_t features, protocol_features = VHOST_USER_PROTOCOL_FEATURES;
+
+ rte_vhost_driver_get_features(dev->ifname, &features);
+
+ /*
+ * REPLY_ACK protocol feature is only mandatory for now
+ * for IOMMU feature. If IOMMU is explicitly disabled by the
+ * application, disable also REPLY_ACK feature for older buggy
+ * Qemu versions (from v2.7.0 to v2.9.0).
+ */
+ if (!(features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
+ protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK);
+
+ msg->payload.u64 = protocol_features;
+ msg->size = sizeof(msg->payload.u64);
+}
+
+static void
vhost_user_set_protocol_features(struct virtio_net *dev,
uint64_t protocol_features)
{
@@ -874,6 +988,116 @@ vhost_user_net_set_mtu(struct virtio_net *dev, struct VhostUserMsg *msg)
return 0;
}
+static int
+vhost_user_set_req_fd(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+ int fd = msg->fds[0];
+
+ if (fd < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Invalid file descriptor for slave channel (%d)\n",
+ fd);
+ return -1;
+ }
+
+ dev->slave_req_fd = fd;
+
+ return 0;
+}
+
+static int
+is_vring_iotlb_update(struct vhost_virtqueue *vq, struct vhost_iotlb_msg *imsg)
+{
+ struct vhost_vring_addr *ra;
+ uint64_t start, end;
+
+ start = imsg->iova;
+ end = start + imsg->size;
+
+ ra = &vq->ring_addrs;
+ if (ra->desc_user_addr >= start && ra->desc_user_addr < end)
+ return 1;
+ if (ra->avail_user_addr >= start && ra->avail_user_addr < end)
+ return 1;
+ if (ra->used_user_addr >= start && ra->used_user_addr < end)
+ return 1;
+
+ return 0;
+}
+
+static int
+is_vring_iotlb_invalidate(struct vhost_virtqueue *vq,
+ struct vhost_iotlb_msg *imsg)
+{
+ uint64_t istart, iend, vstart, vend;
+
+ istart = imsg->iova;
+ iend = istart + imsg->size - 1;
+
+ vstart = (uintptr_t)vq->desc;
+ vend = vstart + sizeof(struct vring_desc) * vq->size - 1;
+ if (vstart <= iend && istart <= vend)
+ return 1;
+
+ vstart = (uintptr_t)vq->avail;
+ vend = vstart + sizeof(struct vring_avail);
+ vend += sizeof(uint16_t) * vq->size - 1;
+ if (vstart <= iend && istart <= vend)
+ return 1;
+
+ vstart = (uintptr_t)vq->used;
+ vend = vstart + sizeof(struct vring_used);
+ vend += sizeof(struct vring_used_elem) * vq->size - 1;
+ if (vstart <= iend && istart <= vend)
+ return 1;
+
+ return 0;
+}
+
+static int
+vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
+{
+ struct virtio_net *dev = *pdev;
+ struct vhost_iotlb_msg *imsg = &msg->payload.iotlb;
+ uint16_t i;
+ uint64_t vva;
+
+ switch (imsg->type) {
+ case VHOST_IOTLB_UPDATE:
+ vva = qva_to_vva(dev, imsg->uaddr);
+ if (!vva)
+ return -1;
+
+ for (i = 0; i < dev->nr_vring; i++) {
+ struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+ vhost_user_iotlb_cache_insert(vq, imsg->iova, vva,
+ imsg->size, imsg->perm);
+
+ if (is_vring_iotlb_update(vq, imsg))
+ *pdev = dev = translate_ring_addresses(dev, i);
+ }
+ break;
+ case VHOST_IOTLB_INVALIDATE:
+ for (i = 0; i < dev->nr_vring; i++) {
+ struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+ vhost_user_iotlb_cache_remove(vq, imsg->iova,
+ imsg->size);
+
+ if (is_vring_iotlb_invalidate(vq, imsg))
+ vring_invalidate(dev, vq);
+ }
+ break;
+ default:
+ RTE_LOG(ERR, VHOST_CONFIG, "Invalid IOTLB message type (%d)\n",
+ imsg->type);
+ return -1;
+ }
+
+ return 0;
+}
+
/* return bytes# of read on success or negative val on failure. */
static int
read_vhost_message(int sockfd, struct VhostUserMsg *msg)
@@ -907,8 +1131,16 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg)
static int
send_vhost_message(int sockfd, struct VhostUserMsg *msg)
{
- int ret;
+ if (!msg)
+ return 0;
+
+ return send_fd_message(sockfd, (char *)msg,
+ VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
+}
+static int
+send_vhost_reply(int sockfd, struct VhostUserMsg *msg)
+{
if (!msg)
return 0;
@@ -917,10 +1149,7 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg)
msg->flags |= VHOST_USER_VERSION;
msg->flags |= VHOST_USER_REPLY_MASK;
- ret = send_fd_message(sockfd, (char *)msg,
- VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
-
- return ret;
+ return send_vhost_message(sockfd, msg);
}
/*
@@ -931,7 +1160,7 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, VhostUserMsg *msg)
{
uint16_t vring_idx;
- switch (msg->request) {
+ switch (msg->request.master) {
case VHOST_USER_SET_VRING_KICK:
case VHOST_USER_SET_VRING_CALL:
case VHOST_USER_SET_VRING_ERR:
@@ -983,7 +1212,7 @@ vhost_user_msg_handler(int vid, int fd)
}
ret = read_vhost_message(fd, &msg);
- if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
+ if (ret <= 0 || msg.request.master >= VHOST_USER_MAX) {
if (ret < 0)
RTE_LOG(ERR, VHOST_CONFIG,
"vhost read message failed\n");
@@ -998,8 +1227,12 @@ vhost_user_msg_handler(int vid, int fd)
}
ret = 0;
- RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
- vhost_message_str[msg.request]);
+ if (msg.request.master != VHOST_USER_IOTLB_MSG)
+ RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
+ vhost_message_str[msg.request.master]);
+ else
+ RTE_LOG(DEBUG, VHOST_CONFIG, "read message %s\n",
+ vhost_message_str[msg.request.master]);
ret = vhost_user_check_and_alloc_queue_pair(dev, &msg);
if (ret < 0) {
@@ -1008,20 +1241,19 @@ vhost_user_msg_handler(int vid, int fd)
return -1;
}
- switch (msg.request) {
+ switch (msg.request.master) {
case VHOST_USER_GET_FEATURES:
msg.payload.u64 = vhost_user_get_features(dev);
msg.size = sizeof(msg.payload.u64);
- send_vhost_message(fd, &msg);
+ send_vhost_reply(fd, &msg);
break;
case VHOST_USER_SET_FEATURES:
vhost_user_set_features(dev, msg.payload.u64);
break;
case VHOST_USER_GET_PROTOCOL_FEATURES:
- msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
- msg.size = sizeof(msg.payload.u64);
- send_vhost_message(fd, &msg);
+ vhost_user_get_protocol_features(dev, &msg);
+ send_vhost_reply(fd, &msg);
break;
case VHOST_USER_SET_PROTOCOL_FEATURES:
vhost_user_set_protocol_features(dev, msg.payload.u64);
@@ -1043,7 +1275,7 @@ vhost_user_msg_handler(int vid, int fd)
/* it needs a reply */
msg.size = sizeof(msg.payload.u64);
- send_vhost_message(fd, &msg);
+ send_vhost_reply(fd, &msg);
break;
case VHOST_USER_SET_LOG_FD:
close(msg.fds[0]);
@@ -1054,7 +1286,7 @@ vhost_user_msg_handler(int vid, int fd)
vhost_user_set_vring_num(dev, &msg);
break;
case VHOST_USER_SET_VRING_ADDR:
- vhost_user_set_vring_addr(dev, &msg);
+ vhost_user_set_vring_addr(&dev, &msg);
break;
case VHOST_USER_SET_VRING_BASE:
vhost_user_set_vring_base(dev, &msg);
@@ -1063,11 +1295,11 @@ vhost_user_msg_handler(int vid, int fd)
case VHOST_USER_GET_VRING_BASE:
vhost_user_get_vring_base(dev, &msg);
msg.size = sizeof(msg.payload.state);
- send_vhost_message(fd, &msg);
+ send_vhost_reply(fd, &msg);
break;
case VHOST_USER_SET_VRING_KICK:
- vhost_user_set_vring_kick(dev, &msg);
+ vhost_user_set_vring_kick(&dev, &msg);
break;
case VHOST_USER_SET_VRING_CALL:
vhost_user_set_vring_call(dev, &msg);
@@ -1082,7 +1314,7 @@ vhost_user_msg_handler(int vid, int fd)
case VHOST_USER_GET_QUEUE_NUM:
msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS;
msg.size = sizeof(msg.payload.u64);
- send_vhost_message(fd, &msg);
+ send_vhost_reply(fd, &msg);
break;
case VHOST_USER_SET_VRING_ENABLE:
@@ -1096,6 +1328,14 @@ vhost_user_msg_handler(int vid, int fd)
ret = vhost_user_net_set_mtu(dev, &msg);
break;
+ case VHOST_USER_SET_SLAVE_REQ_FD:
+ ret = vhost_user_set_req_fd(dev, &msg);
+ break;
+
+ case VHOST_USER_IOTLB_MSG:
+ ret = vhost_user_iotlb_msg(&dev, &msg);
+ break;
+
default:
ret = -1;
break;
@@ -1105,7 +1345,7 @@ vhost_user_msg_handler(int vid, int fd)
if (msg.flags & VHOST_USER_NEED_REPLY) {
msg.payload.u64 = !!ret;
msg.size = sizeof(msg.payload.u64);
- send_vhost_message(fd, &msg);
+ send_vhost_reply(fd, &msg);
}
if (!(dev->flags & VIRTIO_DEV_RUNNING) && virtio_is_ready(dev)) {
@@ -1124,3 +1364,29 @@ vhost_user_msg_handler(int vid, int fd)
return 0;
}
+
+int
+vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
+{
+ int ret;
+ struct VhostUserMsg msg = {
+ .request.slave = VHOST_USER_SLAVE_IOTLB_MSG,
+ .flags = VHOST_USER_VERSION,
+ .size = sizeof(msg.payload.iotlb),
+ .payload.iotlb = {
+ .iova = iova,
+ .perm = perm,
+ .type = VHOST_IOTLB_MISS,
+ },
+ };
+
+ ret = send_vhost_message(dev->slave_req_fd, &msg);
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to send IOTLB miss message (%d)\n",
+ ret);
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index 35ebd719..76d9fe2f 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -48,16 +48,14 @@
#define VHOST_USER_PROTOCOL_F_RARP 2
#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
#define VHOST_USER_PROTOCOL_F_NET_MTU 4
+#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
-/*
- * disable REPLY_ACK feature to workaround the buggy QEMU implementation.
- * Proved buggy QEMU includes v2.7 - v2.9.
- */
#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
(1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
(1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
- (0ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
- (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))
+ (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
+ (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
+ (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ))
typedef enum VhostUserRequest {
VHOST_USER_NONE = 0,
@@ -81,9 +79,17 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_VRING_ENABLE = 18,
VHOST_USER_SEND_RARP = 19,
VHOST_USER_NET_SET_MTU = 20,
+ VHOST_USER_SET_SLAVE_REQ_FD = 21,
+ VHOST_USER_IOTLB_MSG = 22,
VHOST_USER_MAX
} VhostUserRequest;
+typedef enum VhostUserSlaveRequest {
+ VHOST_USER_SLAVE_NONE = 0,
+ VHOST_USER_SLAVE_IOTLB_MSG = 1,
+ VHOST_USER_SLAVE_MAX
+} VhostUserSlaveRequest;
+
typedef struct VhostUserMemoryRegion {
uint64_t guest_phys_addr;
uint64_t memory_size;
@@ -103,7 +109,10 @@ typedef struct VhostUserLog {
} VhostUserLog;
typedef struct VhostUserMsg {
- VhostUserRequest request;
+ union {
+ VhostUserRequest master;
+ VhostUserSlaveRequest slave;
+ } request;
#define VHOST_USER_VERSION_MASK 0x3
#define VHOST_USER_REPLY_MASK (0x1 << 2)
@@ -118,6 +127,7 @@ typedef struct VhostUserMsg {
struct vhost_vring_addr addr;
VhostUserMemory memory;
VhostUserLog log;
+ struct vhost_iotlb_msg iotlb;
} payload;
int fds[VHOST_MEMORY_MAX_NREGIONS];
} __attribute((packed)) VhostUserMsg;
@@ -130,6 +140,7 @@ typedef struct VhostUserMsg {
/* vhost_user.c */
int vhost_user_msg_handler(int vid, int fd);
+int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm);
/* socket.c */
int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index a5f0eeba..6fee16e5 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -45,10 +45,13 @@
#include <rte_sctp.h>
#include <rte_arp.h>
+#include "iotlb.h"
#include "vhost.h"
#define MAX_PKT_BURST 32
+#define MAX_BATCH_LEN 256
+
static bool
is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
{
@@ -105,6 +108,31 @@ update_shadow_used_ring(struct vhost_virtqueue *vq,
vq->shadow_used_ring[i].len = len;
}
+static inline void
+do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+ struct batch_copy_elem *elem = vq->batch_copy_elems;
+ uint16_t count = vq->batch_copy_nb_elems;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
+ vhost_log_write(dev, elem[i].log_addr, elem[i].len);
+ PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);
+ }
+}
+
+static inline void
+do_data_copy_dequeue(struct vhost_virtqueue *vq)
+{
+ struct batch_copy_elem *elem = vq->batch_copy_elems;
+ uint16_t count = vq->batch_copy_nb_elems;
+ int i;
+
+ for (i = 0; i < count; i++)
+ rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
+}
+
/* avoid write operation when necessary, to lessen cache issues */
#define ASSIGN_UNLESS_EQUAL(var, val) do { \
if ((var) != (val)) \
@@ -168,8 +196,9 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
}
static __rte_always_inline int
-copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
- struct rte_mbuf *m, uint16_t desc_idx, uint32_t size)
+copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ struct vring_desc *descs, struct rte_mbuf *m,
+ uint16_t desc_idx, uint32_t size)
{
uint32_t desc_avail, desc_offset;
uint32_t mbuf_avail, mbuf_offset;
@@ -178,16 +207,22 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
uint64_t desc_addr;
/* A counter to avoid desc dead loop chain */
uint16_t nr_desc = 1;
+ struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
+ uint16_t copy_nb = vq->batch_copy_nb_elems;
+ int error = 0;
desc = &descs[desc_idx];
- desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
+ desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+ desc->len, VHOST_ACCESS_RW);
/*
* Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
* performance issue with some versions of gcc (4.8.4 and 5.3.0) which
* otherwise stores offset on the stack instead of in a register.
*/
- if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr)
- return -1;
+ if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) {
+ error = -1;
+ goto out;
+ }
rte_prefetch0((void *)(uintptr_t)desc_addr);
@@ -213,27 +248,45 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
if (desc_avail == 0) {
if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
/* Room in vring buffer is not enough */
- return -1;
+ error = -1;
+ goto out;
+ }
+ if (unlikely(desc->next >= size || ++nr_desc > size)) {
+ error = -1;
+ goto out;
}
- if (unlikely(desc->next >= size || ++nr_desc > size))
- return -1;
desc = &descs[desc->next];
- desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
- if (unlikely(!desc_addr))
- return -1;
+ desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+ desc->len,
+ VHOST_ACCESS_RW);
+ if (unlikely(!desc_addr)) {
+ error = -1;
+ goto out;
+ }
desc_offset = 0;
desc_avail = desc->len;
}
cpy_len = RTE_MIN(desc_avail, mbuf_avail);
- rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
- rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
- cpy_len);
- vhost_log_write(dev, desc->addr + desc_offset, cpy_len);
- PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
- cpy_len, 0);
+ if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) {
+ rte_memcpy((void *)((uintptr_t)(desc_addr +
+ desc_offset)),
+ rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
+ cpy_len);
+ vhost_log_write(dev, desc->addr + desc_offset, cpy_len);
+ PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+ cpy_len, 0);
+ } else {
+ batch_copy[copy_nb].dst =
+ (void *)((uintptr_t)(desc_addr + desc_offset));
+ batch_copy[copy_nb].src =
+ rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
+ batch_copy[copy_nb].log_addr = desc->addr + desc_offset;
+ batch_copy[copy_nb].len = cpy_len;
+ copy_nb++;
+ }
mbuf_avail -= cpy_len;
mbuf_offset += cpy_len;
@@ -241,7 +294,10 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
desc_offset += cpy_len;
}
- return 0;
+out:
+ vq->batch_copy_nb_elems = copy_nb;
+
+ return error;
}
/**
@@ -273,17 +329,29 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
if (unlikely(vq->enabled == 0))
return 0;
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_rd_lock(vq);
+
+ if (unlikely(vq->access_ok == 0)) {
+ if (unlikely(vring_translate(dev, vq) < 0)) {
+ count = 0;
+ goto out;
+ }
+ }
+
avail_idx = *((volatile uint16_t *)&vq->avail->idx);
start_idx = vq->last_used_idx;
free_entries = avail_idx - start_idx;
count = RTE_MIN(count, free_entries);
count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
if (count == 0)
- return 0;
+ goto out;
LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
dev->vid, start_idx, start_idx + count);
+ vq->batch_copy_nb_elems = 0;
+
/* Retrieve all of the desc indexes first to avoid caching issues. */
rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]);
for (i = 0; i < count; i++) {
@@ -304,8 +372,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) {
descs = (struct vring_desc *)(uintptr_t)
- rte_vhost_gpa_to_vva(dev->mem,
- vq->desc[desc_idx].addr);
+ vhost_iova_to_vva(dev,
+ vq, vq->desc[desc_idx].addr,
+ vq->desc[desc_idx].len,
+ VHOST_ACCESS_RO);
if (unlikely(!descs)) {
count = i;
break;
@@ -318,19 +388,18 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
sz = vq->size;
}
- err = copy_mbuf_to_desc(dev, descs, pkts[i], desc_idx, sz);
+ err = copy_mbuf_to_desc(dev, vq, descs, pkts[i], desc_idx, sz);
if (unlikely(err)) {
- used_idx = (start_idx + i) & (vq->size - 1);
- vq->used->ring[used_idx].len = dev->vhost_hlen;
- vhost_log_used_vring(dev, vq,
- offsetof(struct vring_used, ring[used_idx]),
- sizeof(vq->used->ring[used_idx]));
+ count = i;
+ break;
}
if (i + 1 < count)
rte_prefetch0(&vq->desc[desc_indexes[i+1]]);
}
+ do_data_copy_enqueue(dev, vq);
+
rte_smp_wmb();
*(volatile uint16_t *)&vq->used->idx += count;
@@ -346,6 +415,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
&& (vq->callfd >= 0))
eventfd_write(vq->callfd, (eventfd_t)1);
+out:
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_rd_unlock(vq);
+
return count;
}
@@ -364,7 +437,9 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
descs = (struct vring_desc *)(uintptr_t)
- rte_vhost_gpa_to_vva(dev->mem, vq->desc[idx].addr);
+ vhost_iova_to_vva(dev, vq, vq->desc[idx].addr,
+ vq->desc[idx].len,
+ VHOST_ACCESS_RO);
if (unlikely(!descs))
return -1;
@@ -439,8 +514,9 @@ reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
}
static __rte_always_inline int
-copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
- struct buf_vector *buf_vec, uint16_t num_buffers)
+copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ struct rte_mbuf *m, struct buf_vector *buf_vec,
+ uint16_t num_buffers)
{
uint32_t vec_idx = 0;
uint64_t desc_addr;
@@ -449,13 +525,22 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
uint32_t cpy_len;
uint64_t hdr_addr, hdr_phys_addr;
struct rte_mbuf *hdr_mbuf;
+ struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
+ uint16_t copy_nb = vq->batch_copy_nb_elems;
+ int error = 0;
- if (unlikely(m == NULL))
- return -1;
+ if (unlikely(m == NULL)) {
+ error = -1;
+ goto out;
+ }
- desc_addr = rte_vhost_gpa_to_vva(dev->mem, buf_vec[vec_idx].buf_addr);
- if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr)
- return -1;
+ desc_addr = vhost_iova_to_vva(dev, vq, buf_vec[vec_idx].buf_addr,
+ buf_vec[vec_idx].buf_len,
+ VHOST_ACCESS_RW);
+ if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) {
+ error = -1;
+ goto out;
+ }
hdr_mbuf = m;
hdr_addr = desc_addr;
@@ -474,10 +559,15 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
/* done with current desc buf, get the next one */
if (desc_avail == 0) {
vec_idx++;
- desc_addr = rte_vhost_gpa_to_vva(dev->mem,
- buf_vec[vec_idx].buf_addr);
- if (unlikely(!desc_addr))
- return -1;
+ desc_addr =
+ vhost_iova_to_vva(dev, vq,
+ buf_vec[vec_idx].buf_addr,
+ buf_vec[vec_idx].buf_len,
+ VHOST_ACCESS_RW);
+ if (unlikely(!desc_addr)) {
+ error = -1;
+ goto out;
+ }
/* Prefetch buffer address. */
rte_prefetch0((void *)(uintptr_t)desc_addr);
@@ -509,13 +599,27 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
}
cpy_len = RTE_MIN(desc_avail, mbuf_avail);
- rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
- rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
- cpy_len);
- vhost_log_write(dev, buf_vec[vec_idx].buf_addr + desc_offset,
- cpy_len);
- PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
- cpy_len, 0);
+
+ if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) {
+ rte_memcpy((void *)((uintptr_t)(desc_addr +
+ desc_offset)),
+ rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
+ cpy_len);
+ vhost_log_write(dev,
+ buf_vec[vec_idx].buf_addr + desc_offset,
+ cpy_len);
+ PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+ cpy_len, 0);
+ } else {
+ batch_copy[copy_nb].dst =
+ (void *)((uintptr_t)(desc_addr + desc_offset));
+ batch_copy[copy_nb].src =
+ rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
+ batch_copy[copy_nb].log_addr =
+ buf_vec[vec_idx].buf_addr + desc_offset;
+ batch_copy[copy_nb].len = cpy_len;
+ copy_nb++;
+ }
mbuf_avail -= cpy_len;
mbuf_offset += cpy_len;
@@ -523,7 +627,10 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
desc_offset += cpy_len;
}
- return 0;
+out:
+ vq->batch_copy_nb_elems = copy_nb;
+
+ return error;
}
static __rte_always_inline uint32_t
@@ -547,9 +654,18 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
if (unlikely(vq->enabled == 0))
return 0;
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_rd_lock(vq);
+
+ if (unlikely(vq->access_ok == 0))
+ if (unlikely(vring_translate(dev, vq) < 0))
+ goto out;
+
count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
if (count == 0)
- return 0;
+ goto out;
+
+ vq->batch_copy_nb_elems = 0;
rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
@@ -572,7 +688,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
dev->vid, vq->last_avail_idx,
vq->last_avail_idx + num_buffers);
- if (copy_mbuf_to_desc_mergeable(dev, pkts[pkt_idx],
+ if (copy_mbuf_to_desc_mergeable(dev, vq, pkts[pkt_idx],
buf_vec, num_buffers) < 0) {
vq->shadow_used_idx -= num_buffers;
break;
@@ -581,6 +697,8 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
vq->last_avail_idx += num_buffers;
}
+ do_data_copy_enqueue(dev, vq);
+
if (likely(vq->shadow_used_idx)) {
flush_shadow_used_ring(dev, vq);
@@ -593,6 +711,10 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
eventfd_write(vq->callfd, (eventfd_t)1);
}
+out:
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_rd_unlock(vq);
+
return pkt_idx;
}
@@ -766,8 +888,9 @@ put_zmbuf(struct zcopy_mbuf *zmbuf)
}
static __rte_always_inline int
-copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
- uint16_t max_desc, struct rte_mbuf *m, uint16_t desc_idx,
+copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ struct vring_desc *descs, uint16_t max_desc,
+ struct rte_mbuf *m, uint16_t desc_idx,
struct rte_mempool *mbuf_pool)
{
struct vring_desc *desc;
@@ -779,15 +902,25 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
struct virtio_net_hdr *hdr = NULL;
/* A counter to avoid desc dead loop chain */
uint32_t nr_desc = 1;
+ struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
+ uint16_t copy_nb = vq->batch_copy_nb_elems;
+ int error = 0;
desc = &descs[desc_idx];
if (unlikely((desc->len < dev->vhost_hlen)) ||
- (desc->flags & VRING_DESC_F_INDIRECT))
- return -1;
+ (desc->flags & VRING_DESC_F_INDIRECT)) {
+ error = -1;
+ goto out;
+ }
- desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
- if (unlikely(!desc_addr))
- return -1;
+ desc_addr = vhost_iova_to_vva(dev,
+ vq, desc->addr,
+ desc->len,
+ VHOST_ACCESS_RO);
+ if (unlikely(!desc_addr)) {
+ error = -1;
+ goto out;
+ }
if (virtio_net_with_host_offload(dev)) {
hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
@@ -802,12 +935,19 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
if (likely((desc->len == dev->vhost_hlen) &&
(desc->flags & VRING_DESC_F_NEXT) != 0)) {
desc = &descs[desc->next];
- if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
- return -1;
+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
+ error = -1;
+ goto out;
+ }
- desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
- if (unlikely(!desc_addr))
- return -1;
+ desc_addr = vhost_iova_to_vva(dev,
+ vq, desc->addr,
+ desc->len,
+ VHOST_ACCESS_RO);
+ if (unlikely(!desc_addr)) {
+ error = -1;
+ goto out;
+ }
desc_offset = 0;
desc_avail = desc->len;
@@ -838,7 +978,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
cur->data_len = cpy_len;
cur->data_off = 0;
cur->buf_addr = (void *)(uintptr_t)desc_addr;
- cur->buf_physaddr = hpa;
+ cur->buf_iova = hpa;
/*
* In zero copy mode, one mbuf can only reference data
@@ -846,10 +986,24 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
*/
mbuf_avail = cpy_len;
} else {
- rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
- mbuf_offset),
- (void *)((uintptr_t)(desc_addr + desc_offset)),
- cpy_len);
+ if (likely(cpy_len > MAX_BATCH_LEN ||
+ copy_nb >= vq->size ||
+ (hdr && cur == m))) {
+ rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
+ mbuf_offset),
+ (void *)((uintptr_t)(desc_addr +
+ desc_offset)),
+ cpy_len);
+ } else {
+ batch_copy[copy_nb].dst =
+ rte_pktmbuf_mtod_offset(cur, void *,
+ mbuf_offset);
+ batch_copy[copy_nb].src =
+ (void *)((uintptr_t)(desc_addr +
+ desc_offset));
+ batch_copy[copy_nb].len = cpy_len;
+ copy_nb++;
+ }
}
mbuf_avail -= cpy_len;
@@ -863,15 +1017,24 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
break;
if (unlikely(desc->next >= max_desc ||
- ++nr_desc > max_desc))
- return -1;
+ ++nr_desc > max_desc)) {
+ error = -1;
+ goto out;
+ }
desc = &descs[desc->next];
- if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
- return -1;
+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
+ error = -1;
+ goto out;
+ }
- desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
- if (unlikely(!desc_addr))
- return -1;
+ desc_addr = vhost_iova_to_vva(dev,
+ vq, desc->addr,
+ desc->len,
+ VHOST_ACCESS_RO);
+ if (unlikely(!desc_addr)) {
+ error = -1;
+ goto out;
+ }
rte_prefetch0((void *)(uintptr_t)desc_addr);
@@ -890,7 +1053,8 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
if (unlikely(cur == NULL)) {
RTE_LOG(ERR, VHOST_DATA, "Failed to "
"allocate memory for mbuf.\n");
- return -1;
+ error = -1;
+ goto out;
}
if (unlikely(dev->dequeue_zero_copy))
rte_mbuf_refcnt_update(cur, 1);
@@ -912,7 +1076,10 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
if (hdr)
vhost_dequeue_offload(hdr, m);
- return 0;
+out:
+ vq->batch_copy_nb_elems = copy_nb;
+
+ return error;
}
static __rte_always_inline void
@@ -1016,6 +1183,15 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
if (unlikely(vq->enabled == 0))
return 0;
+ vq->batch_copy_nb_elems = 0;
+
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_rd_lock(vq);
+
+ if (unlikely(vq->access_ok == 0))
+ if (unlikely(vring_translate(dev, vq) < 0))
+ goto out;
+
if (unlikely(dev->dequeue_zero_copy)) {
struct zcopy_mbuf *zmbuf, *next;
int nr_updated = 0;
@@ -1115,8 +1291,10 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
desc = (struct vring_desc *)(uintptr_t)
- rte_vhost_gpa_to_vva(dev->mem,
- vq->desc[desc_indexes[i]].addr);
+ vhost_iova_to_vva(dev, vq,
+ vq->desc[desc_indexes[i]].addr,
+ sizeof(*desc),
+ VHOST_ACCESS_RO);
if (unlikely(!desc))
break;
@@ -1136,7 +1314,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
break;
}
- err = copy_desc_to_mbuf(dev, desc, sz, pkts[i], idx, mbuf_pool);
+ err = copy_desc_to_mbuf(dev, vq, desc, sz, pkts[i], idx,
+ mbuf_pool);
if (unlikely(err)) {
rte_pktmbuf_free(pkts[i]);
break;
@@ -1168,11 +1347,15 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
vq->last_avail_idx += i;
if (likely(dev->dequeue_zero_copy == 0)) {
+ do_data_copy_dequeue(vq);
vq->last_used_idx += i;
update_used_idx(dev, vq, i);
}
out:
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_rd_unlock(vq);
+
if (unlikely(rarp_mbuf != NULL)) {
/*
* Inject it to the head of "pkts" array, so that switch's mac