From 509648b87434b9032d38b8ca5ad470ba3edcc036 Mon Sep 17 00:00:00 2001 From: Ido Barnea Date: Wed, 9 Dec 2015 05:07:44 +0200 Subject: Adding dpdk 2.2 instead of dpdk 1.8 and making changes to make compilation work. 40G and 10G filters do not work yet. --- src/dpdk22/lib/librte_acl/acl.h | 241 ++ src/dpdk22/lib/librte_acl/acl_run.h | 263 ++ src/dpdk22/lib/librte_acl/acl_run_avx2.h | 284 ++ src/dpdk22/lib/librte_acl/acl_run_neon.h | 289 ++ src/dpdk22/lib/librte_acl/acl_run_sse.h | 357 ++ src/dpdk22/lib/librte_acl/acl_vect.h | 116 + src/dpdk22/lib/librte_acl/rte_acl.h | 388 ++ src/dpdk22/lib/librte_acl/rte_acl_osdep.h | 80 + src/dpdk22/lib/librte_acl/tb_mem.h | 76 + src/dpdk22/lib/librte_cfgfile/rte_cfgfile.c | 356 ++ src/dpdk22/lib/librte_cfgfile/rte_cfgfile.h | 200 + src/dpdk22/lib/librte_compat/rte_compat.h | 105 + .../lib/librte_distributor/rte_distributor.h | 247 ++ .../lib/librte_eal/common/eal_common_cpuflags.c | 86 + src/dpdk22/lib/librte_eal/common/eal_common_dev.c | 152 + .../lib/librte_eal/common/eal_common_devargs.c | 177 + .../lib/librte_eal/common/eal_common_errno.c | 72 + .../lib/librte_eal/common/eal_common_hexdump.c | 120 + .../lib/librte_eal/common/eal_common_launch.c | 118 + .../lib/librte_eal/common/eal_common_lcore.c | 110 + src/dpdk22/lib/librte_eal/common/eal_common_log.c | 337 ++ .../lib/librte_eal/common/eal_common_memory.c | 154 + .../lib/librte_eal/common/eal_common_memzone.c | 445 +++ .../lib/librte_eal/common/eal_common_options.c | 1023 +++++ src/dpdk22/lib/librte_eal/common/eal_common_pci.c | 464 +++ .../lib/librte_eal/common/eal_common_pci_uio.c | 222 ++ .../lib/librte_eal/common/eal_common_string_fns.c | 69 + .../lib/librte_eal/common/eal_common_tailqs.c | 202 + .../lib/librte_eal/common/eal_common_thread.c | 157 + .../lib/librte_eal/common/eal_common_timer.c | 86 + src/dpdk22/lib/librte_eal/common/eal_filesystem.h | 118 + src/dpdk22/lib/librte_eal/common/eal_hugepages.h | 67 + .../lib/librte_eal/common/eal_internal_cfg.h | 94 + src/dpdk22/lib/librte_eal/common/eal_options.h | 100 + src/dpdk22/lib/librte_eal/common/eal_private.h | 349 ++ src/dpdk22/lib/librte_eal/common/eal_thread.h | 100 + .../common/include/arch/x86/rte_atomic.h | 222 ++ .../common/include/arch/x86/rte_atomic_32.h | 222 ++ .../common/include/arch/x86/rte_atomic_64.h | 191 + .../common/include/arch/x86/rte_byteorder.h | 125 + .../common/include/arch/x86/rte_byteorder_32.h | 51 + .../common/include/arch/x86/rte_byteorder_64.h | 52 + .../common/include/arch/x86/rte_cpuflags.h | 310 ++ .../common/include/arch/x86/rte_cycles.h | 121 + .../common/include/arch/x86/rte_memcpy.h | 639 ++++ .../common/include/arch/x86/rte_prefetch.h | 62 + .../librte_eal/common/include/arch/x86/rte_rtm.h | 73 + .../common/include/arch/x86/rte_rwlock.h | 82 + .../common/include/arch/x86/rte_spinlock.h | 201 + .../librte_eal/common/include/arch/x86/rte_vect.h | 132 + .../librte_eal/common/include/generic/rte_atomic.h | 945 +++++ .../common/include/generic/rte_byteorder.h | 217 ++ .../common/include/generic/rte_cpuflags.h | 120 + .../librte_eal/common/include/generic/rte_cycles.h | 205 ++ .../librte_eal/common/include/generic/rte_memcpy.h | 144 + .../common/include/generic/rte_prefetch.h | 71 + .../librte_eal/common/include/generic/rte_rwlock.h | 208 ++ .../common/include/generic/rte_spinlock.h | 325 ++ .../lib/librte_eal/common/include/rte_alarm.h | 106 + .../common/include/rte_branch_prediction.h | 70 + .../lib/librte_eal/common/include/rte_common.h | 401 ++ .../lib/librte_eal/common/include/rte_debug.h | 103 + src/dpdk22/lib/librte_eal/common/include/rte_dev.h | 192 + .../lib/librte_eal/common/include/rte_devargs.h | 177 + src/dpdk22/lib/librte_eal/common/include/rte_eal.h | 241 ++ .../librte_eal/common/include/rte_eal_memconfig.h | 100 + .../lib/librte_eal/common/include/rte_errno.h | 95 + .../lib/librte_eal/common/include/rte_hexdump.h | 89 + .../lib/librte_eal/common/include/rte_interrupts.h | 120 + .../lib/librte_eal/common/include/rte_keepalive.h | 146 + .../lib/librte_eal/common/include/rte_launch.h | 177 + .../lib/librte_eal/common/include/rte_lcore.h | 276 ++ src/dpdk22/lib/librte_eal/common/include/rte_log.h | 311 ++ .../lib/librte_eal/common/include/rte_malloc.h | 342 ++ .../librte_eal/common/include/rte_malloc_heap.h | 55 + .../lib/librte_eal/common/include/rte_memory.h | 250 ++ .../lib/librte_eal/common/include/rte_memzone.h | 305 ++ src/dpdk22/lib/librte_eal/common/include/rte_pci.h | 504 +++ .../common/include/rte_pci_dev_feature_defs.h | 70 + .../common/include/rte_pci_dev_features.h | 69 + .../librte_eal/common/include/rte_pci_dev_ids.h | 667 ++++ .../lib/librte_eal/common/include/rte_per_lcore.h | 79 + .../lib/librte_eal/common/include/rte_random.h | 91 + .../lib/librte_eal/common/include/rte_string_fns.h | 81 + .../lib/librte_eal/common/include/rte_tailq.h | 162 + .../lib/librte_eal/common/include/rte_time.h | 122 + .../lib/librte_eal/common/include/rte_version.h | 130 + .../lib/librte_eal/common/include/rte_warnings.h | 84 + src/dpdk22/lib/librte_eal/common/malloc_elem.c | 344 ++ src/dpdk22/lib/librte_eal/common/malloc_elem.h | 192 + src/dpdk22/lib/librte_eal/common/malloc_heap.c | 236 ++ src/dpdk22/lib/librte_eal/common/malloc_heap.h | 70 + src/dpdk22/lib/librte_eal/common/rte_keepalive.c | 113 + src/dpdk22/lib/librte_eal/common/rte_malloc.c | 262 ++ src/dpdk22/lib/librte_eal/linuxapp/eal/eal.c | 927 +++++ src/dpdk22/lib/librte_eal/linuxapp/eal/eal_alarm.c | 273 ++ src/dpdk22/lib/librte_eal/linuxapp/eal/eal_debug.c | 119 + .../librte_eal/linuxapp/eal/eal_hugepage_info.c | 365 ++ .../lib/librte_eal/linuxapp/eal/eal_interrupts.c | 1224 ++++++ .../lib/librte_eal/linuxapp/eal/eal_ivshmem.c | 958 +++++ src/dpdk22/lib/librte_eal/linuxapp/eal/eal_lcore.c | 110 + src/dpdk22/lib/librte_eal/linuxapp/eal/eal_log.c | 146 + .../lib/librte_eal/linuxapp/eal/eal_memory.c | 1599 ++++++++ src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci.c | 656 ++++ .../lib/librte_eal/linuxapp/eal/eal_pci_init.h | 111 + .../lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 365 ++ .../lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 928 +++++ .../librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c | 405 ++ .../lib/librte_eal/linuxapp/eal/eal_thread.c | 199 + src/dpdk22/lib/librte_eal/linuxapp/eal/eal_timer.c | 304 ++ src/dpdk22/lib/librte_eal/linuxapp/eal/eal_vfio.h | 59 + .../eal/include/exec-env/rte_dom0_common.h | 108 + .../linuxapp/eal/include/exec-env/rte_interrupts.h | 228 ++ .../linuxapp/eal/include/exec-env/rte_kni_common.h | 174 + .../lib/librte_eal/linuxapp/igb_uio/compat.h | 116 + .../lib/librte_eal/linuxapp/xen_dom0/compat.h | 15 + .../lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h | 107 + src/dpdk22/lib/librte_ether/rte_dev_info.h | 57 + src/dpdk22/lib/librte_ether/rte_eth_ctrl.h | 811 ++++ src/dpdk22/lib/librte_ether/rte_ethdev.c | 3241 ++++++++++++++++ src/dpdk22/lib/librte_ether/rte_ethdev.h | 3894 ++++++++++++++++++++ src/dpdk22/lib/librte_ether/rte_ether.h | 416 +++ src/dpdk22/lib/librte_hash/rte_cmp_arm64.h | 114 + src/dpdk22/lib/librte_hash/rte_cmp_x86.h | 109 + src/dpdk22/lib/librte_hash/rte_crc_arm64.h | 151 + src/dpdk22/lib/librte_hash/rte_cuckoo_hash.c | 1243 +++++++ src/dpdk22/lib/librte_hash/rte_fbk_hash.h | 396 ++ src/dpdk22/lib/librte_hash/rte_hash.h | 436 +++ src/dpdk22/lib/librte_hash/rte_hash_crc.h | 568 +++ src/dpdk22/lib/librte_hash/rte_jhash.h | 410 +++ src/dpdk22/lib/librte_hash/rte_thash.h | 250 ++ src/dpdk22/lib/librte_kvargs/rte_kvargs.c | 212 ++ src/dpdk22/lib/librte_kvargs/rte_kvargs.h | 156 + src/dpdk22/lib/librte_mbuf/rte_mbuf.c | 288 ++ src/dpdk22/lib/librte_mbuf/rte_mbuf.h | 1865 ++++++++++ src/dpdk22/lib/librte_mempool/rte_mempool.c | 921 +++++ src/dpdk22/lib/librte_mempool/rte_mempool.h | 1408 +++++++ src/dpdk22/lib/librte_net/rte_arp.h | 83 + src/dpdk22/lib/librte_net/rte_icmp.h | 101 + src/dpdk22/lib/librte_net/rte_ip.h | 413 +++ src/dpdk22/lib/librte_net/rte_sctp.h | 99 + src/dpdk22/lib/librte_net/rte_tcp.h | 104 + src/dpdk22/lib/librte_net/rte_udp.h | 99 + src/dpdk22/lib/librte_pipeline/rte_pipeline.c | 1638 ++++++++ src/dpdk22/lib/librte_pipeline/rte_pipeline.h | 827 +++++ src/dpdk22/lib/librte_port/rte_port.h | 263 ++ src/dpdk22/lib/librte_port/rte_port_ethdev.h | 105 + src/dpdk22/lib/librte_port/rte_port_frag.h | 101 + src/dpdk22/lib/librte_port/rte_port_ras.h | 90 + src/dpdk22/lib/librte_port/rte_port_ring.h | 123 + src/dpdk22/lib/librte_port/rte_port_sched.h | 82 + src/dpdk22/lib/librte_port/rte_port_source_sink.h | 70 + src/dpdk22/lib/librte_ring/rte_ring.c | 373 ++ src/dpdk22/lib/librte_ring/rte_ring.h | 1251 +++++++ src/dpdk22/lib/librte_table/rte_lru.h | 213 ++ src/dpdk22/lib/librte_table/rte_table.h | 301 ++ src/dpdk22/lib/librte_table/rte_table_acl.h | 95 + src/dpdk22/lib/librte_table/rte_table_array.h | 76 + src/dpdk22/lib/librte_table/rte_table_hash.h | 370 ++ src/dpdk22/lib/librte_table/rte_table_lpm.h | 118 + src/dpdk22/lib/librte_table/rte_table_lpm_ipv6.h | 122 + src/dpdk22/lib/librte_table/rte_table_stub.h | 62 + 162 files changed, 52995 insertions(+) create mode 100644 src/dpdk22/lib/librte_acl/acl.h create mode 100644 src/dpdk22/lib/librte_acl/acl_run.h create mode 100644 src/dpdk22/lib/librte_acl/acl_run_avx2.h create mode 100644 src/dpdk22/lib/librte_acl/acl_run_neon.h create mode 100644 src/dpdk22/lib/librte_acl/acl_run_sse.h create mode 100644 src/dpdk22/lib/librte_acl/acl_vect.h create mode 100644 src/dpdk22/lib/librte_acl/rte_acl.h create mode 100644 src/dpdk22/lib/librte_acl/rte_acl_osdep.h create mode 100644 src/dpdk22/lib/librte_acl/tb_mem.h create mode 100644 src/dpdk22/lib/librte_cfgfile/rte_cfgfile.c create mode 100644 src/dpdk22/lib/librte_cfgfile/rte_cfgfile.h create mode 100644 src/dpdk22/lib/librte_compat/rte_compat.h create mode 100644 src/dpdk22/lib/librte_distributor/rte_distributor.h create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_cpuflags.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_dev.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_devargs.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_errno.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_hexdump.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_launch.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_lcore.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_log.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_memory.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_memzone.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_options.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_pci.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_pci_uio.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_string_fns.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_tailqs.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_thread.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_common_timer.c create mode 100644 src/dpdk22/lib/librte_eal/common/eal_filesystem.h create mode 100644 src/dpdk22/lib/librte_eal/common/eal_hugepages.h create mode 100644 src/dpdk22/lib/librte_eal/common/eal_internal_cfg.h create mode 100644 src/dpdk22/lib/librte_eal/common/eal_options.h create mode 100644 src/dpdk22/lib/librte_eal/common/eal_private.h create mode 100644 src/dpdk22/lib/librte_eal/common/eal_thread.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_atomic.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_byteorder.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_cpuflags.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_cycles.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_memcpy.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_prefetch.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_rtm.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_rwlock.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_spinlock.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_vect.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/generic/rte_atomic.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/generic/rte_byteorder.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/generic/rte_cpuflags.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/generic/rte_cycles.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/generic/rte_memcpy.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/generic/rte_prefetch.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/generic/rte_rwlock.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/generic/rte_spinlock.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_alarm.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_branch_prediction.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_common.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_debug.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_dev.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_devargs.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_eal.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_eal_memconfig.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_errno.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_hexdump.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_interrupts.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_keepalive.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_launch.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_lcore.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_log.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_malloc.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_malloc_heap.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_memory.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_memzone.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_pci.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_pci_dev_features.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_pci_dev_ids.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_per_lcore.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_random.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_string_fns.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_tailq.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_time.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_version.h create mode 100644 src/dpdk22/lib/librte_eal/common/include/rte_warnings.h create mode 100644 src/dpdk22/lib/librte_eal/common/malloc_elem.c create mode 100644 src/dpdk22/lib/librte_eal/common/malloc_elem.h create mode 100644 src/dpdk22/lib/librte_eal/common/malloc_heap.c create mode 100644 src/dpdk22/lib/librte_eal/common/malloc_heap.h create mode 100644 src/dpdk22/lib/librte_eal/common/rte_keepalive.c create mode 100644 src/dpdk22/lib/librte_eal/common/rte_malloc.c create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal.c create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_alarm.c create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_debug.c create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_interrupts.c create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_ivshmem.c create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_lcore.c create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_log.c create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_memory.c create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci.c create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_init.h create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_uio.c create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_thread.c create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_timer.c create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_vfio.h create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/igb_uio/compat.h create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/xen_dom0/compat.h create mode 100644 src/dpdk22/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h create mode 100644 src/dpdk22/lib/librte_ether/rte_dev_info.h create mode 100644 src/dpdk22/lib/librte_ether/rte_eth_ctrl.h create mode 100644 src/dpdk22/lib/librte_ether/rte_ethdev.c create mode 100644 src/dpdk22/lib/librte_ether/rte_ethdev.h create mode 100644 src/dpdk22/lib/librte_ether/rte_ether.h create mode 100644 src/dpdk22/lib/librte_hash/rte_cmp_arm64.h create mode 100644 src/dpdk22/lib/librte_hash/rte_cmp_x86.h create mode 100644 src/dpdk22/lib/librte_hash/rte_crc_arm64.h create mode 100644 src/dpdk22/lib/librte_hash/rte_cuckoo_hash.c create mode 100644 src/dpdk22/lib/librte_hash/rte_fbk_hash.h create mode 100644 src/dpdk22/lib/librte_hash/rte_hash.h create mode 100644 src/dpdk22/lib/librte_hash/rte_hash_crc.h create mode 100644 src/dpdk22/lib/librte_hash/rte_jhash.h create mode 100644 src/dpdk22/lib/librte_hash/rte_thash.h create mode 100644 src/dpdk22/lib/librte_kvargs/rte_kvargs.c create mode 100644 src/dpdk22/lib/librte_kvargs/rte_kvargs.h create mode 100644 src/dpdk22/lib/librte_mbuf/rte_mbuf.c create mode 100644 src/dpdk22/lib/librte_mbuf/rte_mbuf.h create mode 100644 src/dpdk22/lib/librte_mempool/rte_mempool.c create mode 100644 src/dpdk22/lib/librte_mempool/rte_mempool.h create mode 100644 src/dpdk22/lib/librte_net/rte_arp.h create mode 100644 src/dpdk22/lib/librte_net/rte_icmp.h create mode 100644 src/dpdk22/lib/librte_net/rte_ip.h create mode 100644 src/dpdk22/lib/librte_net/rte_sctp.h create mode 100644 src/dpdk22/lib/librte_net/rte_tcp.h create mode 100644 src/dpdk22/lib/librte_net/rte_udp.h create mode 100644 src/dpdk22/lib/librte_pipeline/rte_pipeline.c create mode 100644 src/dpdk22/lib/librte_pipeline/rte_pipeline.h create mode 100644 src/dpdk22/lib/librte_port/rte_port.h create mode 100644 src/dpdk22/lib/librte_port/rte_port_ethdev.h create mode 100644 src/dpdk22/lib/librte_port/rte_port_frag.h create mode 100644 src/dpdk22/lib/librte_port/rte_port_ras.h create mode 100644 src/dpdk22/lib/librte_port/rte_port_ring.h create mode 100644 src/dpdk22/lib/librte_port/rte_port_sched.h create mode 100644 src/dpdk22/lib/librte_port/rte_port_source_sink.h create mode 100644 src/dpdk22/lib/librte_ring/rte_ring.c create mode 100644 src/dpdk22/lib/librte_ring/rte_ring.h create mode 100644 src/dpdk22/lib/librte_table/rte_lru.h create mode 100644 src/dpdk22/lib/librte_table/rte_table.h create mode 100644 src/dpdk22/lib/librte_table/rte_table_acl.h create mode 100644 src/dpdk22/lib/librte_table/rte_table_array.h create mode 100644 src/dpdk22/lib/librte_table/rte_table_hash.h create mode 100644 src/dpdk22/lib/librte_table/rte_table_lpm.h create mode 100644 src/dpdk22/lib/librte_table/rte_table_lpm_ipv6.h create mode 100644 src/dpdk22/lib/librte_table/rte_table_stub.h (limited to 'src/dpdk22/lib') diff --git a/src/dpdk22/lib/librte_acl/acl.h b/src/dpdk22/lib/librte_acl/acl.h new file mode 100644 index 00000000..09d67841 --- /dev/null +++ b/src/dpdk22/lib/librte_acl/acl.h @@ -0,0 +1,241 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ACL_H_ +#define _ACL_H_ + +#ifdef __cplusplus +extern"C" { +#endif /* __cplusplus */ + +#define RTE_ACL_QUAD_MAX 5 +#define RTE_ACL_QUAD_SIZE 4 +#define RTE_ACL_QUAD_SINGLE UINT64_C(0x7f7f7f7f00000000) + +#define RTE_ACL_SINGLE_TRIE_SIZE 2000 + +#define RTE_ACL_DFA_MAX UINT8_MAX +#define RTE_ACL_DFA_SIZE (UINT8_MAX + 1) + +#define RTE_ACL_DFA_GR64_SIZE 64 +#define RTE_ACL_DFA_GR64_NUM (RTE_ACL_DFA_SIZE / RTE_ACL_DFA_GR64_SIZE) +#define RTE_ACL_DFA_GR64_BIT \ + (CHAR_BIT * sizeof(uint32_t) / RTE_ACL_DFA_GR64_NUM) + +typedef int bits_t; + +#define RTE_ACL_BIT_SET_SIZE ((UINT8_MAX + 1) / (sizeof(bits_t) * CHAR_BIT)) + +struct rte_acl_bitset { + bits_t bits[RTE_ACL_BIT_SET_SIZE]; +}; + +#define RTE_ACL_NODE_DFA (0 << RTE_ACL_TYPE_SHIFT) +#define RTE_ACL_NODE_SINGLE (1U << RTE_ACL_TYPE_SHIFT) +#define RTE_ACL_NODE_QRANGE (3U << RTE_ACL_TYPE_SHIFT) +#define RTE_ACL_NODE_MATCH (4U << RTE_ACL_TYPE_SHIFT) +#define RTE_ACL_NODE_TYPE (7U << RTE_ACL_TYPE_SHIFT) +#define RTE_ACL_NODE_UNDEFINED UINT32_MAX + +/* + * ACL RT structure is a set of multibit tries (with stride == 8) + * represented by an array of transitions. The next position is calculated + * based on the current position and the input byte. + * Each transition is 64 bit value with the following format: + * | node_type_specific : 32 | node_type : 3 | node_addr : 29 | + * For all node types except RTE_ACL_NODE_MATCH, node_addr is an index + * to the start of the node in the transtions array. + * Few different node types are used: + * RTE_ACL_NODE_MATCH: + * node_addr value is and index into an array that contains the return value + * and its priority for each category. + * Upper 32 bits of the transition value are not used for that node type. + * RTE_ACL_NODE_QRANGE: + * that node consist of up to 5 transitions. + * Upper 32 bits are interpreted as 4 signed character values which + * are ordered from smallest(INT8_MIN) to largest (INT8_MAX). + * These values define 5 ranges: + * INT8_MIN <= range[0] <= ((int8_t *)&transition)[4] + * ((int8_t *)&transition)[4] < range[1] <= ((int8_t *)&transition)[5] + * ((int8_t *)&transition)[5] < range[2] <= ((int8_t *)&transition)[6] + * ((int8_t *)&transition)[6] < range[3] <= ((int8_t *)&transition)[7] + * ((int8_t *)&transition)[7] < range[4] <= INT8_MAX + * So for input byte value within range[i] i-th transition within that node + * will be used. + * RTE_ACL_NODE_SINGLE: + * always transitions to the same node regardless of the input value. + * RTE_ACL_NODE_DFA: + * that node consits of up to 256 transitions. + * In attempt to conserve space all transitions are divided into 4 consecutive + * groups, by 64 transitions per group: + * group64[i] contains transitions[i * 64, .. i * 64 + 63]. + * Upper 32 bits are interpreted as 4 unsigned character values one per group, + * which contain index to the start of the given group within the node. + * So to calculate transition index within the node for given input byte value: + * input_byte - ((uint8_t *)&transition)[4 + input_byte / 64]. + */ + +/* + * Structure of a node is a set of ptrs and each ptr has a bit map + * of values associated with this transition. + */ +struct rte_acl_ptr_set { + struct rte_acl_bitset values; /* input values associated with ptr */ + struct rte_acl_node *ptr; /* transition to next node */ +}; + +struct rte_acl_classifier_results { + int results[RTE_ACL_MAX_CATEGORIES]; +}; + +struct rte_acl_match_results { + uint32_t results[RTE_ACL_MAX_CATEGORIES]; + int32_t priority[RTE_ACL_MAX_CATEGORIES]; +}; + +struct rte_acl_node { + uint64_t node_index; /* index for this node */ + uint32_t level; /* level 0-n in the trie */ + uint32_t ref_count; /* ref count for this node */ + struct rte_acl_bitset values; + /* set of all values that map to another node + * (union of bits in each transition. + */ + uint32_t num_ptrs; /* number of ptr_set in use */ + uint32_t max_ptrs; /* number of allocated ptr_set */ + uint32_t min_add; /* number of ptr_set per allocation */ + struct rte_acl_ptr_set *ptrs; /* transitions array for this node */ + int32_t match_flag; + int32_t match_index; /* index to match data */ + uint32_t node_type; + int32_t fanout; + /* number of ranges (transitions w/ consecutive bits) */ + int32_t id; + struct rte_acl_match_results *mrt; /* only valid when match_flag != 0 */ + union { + char transitions[RTE_ACL_QUAD_SIZE]; + /* boundaries for ranged node */ + uint8_t dfa_gr64[RTE_ACL_DFA_GR64_NUM]; + }; + struct rte_acl_node *next; + /* free list link or pointer to duplicate node during merge */ + struct rte_acl_node *prev; + /* points to node from which this node was duplicated */ +}; + +/* + * Types of tries used to generate runtime structure(s) + */ +enum { + RTE_ACL_FULL_TRIE = 0, + RTE_ACL_NOSRC_TRIE = 1, + RTE_ACL_NODST_TRIE = 2, + RTE_ACL_NOPORTS_TRIE = 4, + RTE_ACL_NOVLAN_TRIE = 8, + RTE_ACL_UNUSED_TRIE = 0x80000000 +}; + + +/** MAX number of tries per one ACL context.*/ +#define RTE_ACL_MAX_TRIES 8 + +/** Max number of characters in PM name.*/ +#define RTE_ACL_NAMESIZE 32 + + +struct rte_acl_trie { + uint32_t type; + uint32_t count; + uint32_t root_index; + const uint32_t *data_index; + uint32_t num_data_indexes; +}; + +struct rte_acl_bld_trie { + struct rte_acl_node *trie; +}; + +struct rte_acl_ctx { + char name[RTE_ACL_NAMESIZE]; + /** Name of the ACL context. */ + int32_t socket_id; + /** Socket ID to allocate memory from. */ + enum rte_acl_classify_alg alg; + void *rules; + uint32_t max_rules; + uint32_t rule_sz; + uint32_t num_rules; + uint32_t num_categories; + uint32_t num_tries; + uint32_t match_index; + uint64_t no_match; + uint64_t idle; + uint64_t *trans_table; + uint32_t *data_indexes; + struct rte_acl_trie trie[RTE_ACL_MAX_TRIES]; + void *mem; + size_t mem_sz; + struct rte_acl_config config; /* copy of build config. */ +}; + +int rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie, + struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries, + uint32_t num_categories, uint32_t data_index_sz, size_t max_size); + +typedef int (*rte_acl_classify_t) +(const struct rte_acl_ctx *, const uint8_t **, uint32_t *, uint32_t, uint32_t); + +/* + * Different implementations of ACL classify. + */ +int +rte_acl_classify_scalar(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories); + +int +rte_acl_classify_sse(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories); + +int +rte_acl_classify_avx2(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories); + +int +rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _ACL_H_ */ diff --git a/src/dpdk22/lib/librte_acl/acl_run.h b/src/dpdk22/lib/librte_acl/acl_run.h new file mode 100644 index 00000000..b2fc42c6 --- /dev/null +++ b/src/dpdk22/lib/librte_acl/acl_run.h @@ -0,0 +1,263 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ACL_RUN_H_ +#define _ACL_RUN_H_ + +#include +#include "acl.h" + +#define MAX_SEARCHES_AVX16 16 +#define MAX_SEARCHES_SSE8 8 +#define MAX_SEARCHES_SSE4 4 +#define MAX_SEARCHES_SCALAR 2 + +#define GET_NEXT_4BYTES(prm, idx) \ + (*((const int32_t *)((prm)[(idx)].data + *(prm)[idx].data_index++))) + + +#define RTE_ACL_NODE_INDEX ((uint32_t)~RTE_ACL_NODE_TYPE) + +#define SCALAR_QRANGE_MULT 0x01010101 +#define SCALAR_QRANGE_MASK 0x7f7f7f7f +#define SCALAR_QRANGE_MIN 0x80808080 + +/* + * Structure to manage N parallel trie traversals. + * The runtime trie traversal routines can process 8, 4, or 2 tries + * in parallel. Each packet may require multiple trie traversals (up to 4). + * This structure is used to fill the slots (0 to n-1) for parallel processing + * with the trie traversals needed for each packet. + */ +struct acl_flow_data { + uint32_t num_packets; + /* number of packets processed */ + uint32_t started; + /* number of trie traversals in progress */ + uint32_t trie; + /* current trie index (0 to N-1) */ + uint32_t cmplt_size; + uint32_t total_packets; + uint32_t categories; + /* number of result categories per packet. */ + /* maximum number of packets to process */ + const uint64_t *trans; + const uint8_t **data; + uint32_t *results; + struct completion *last_cmplt; + struct completion *cmplt_array; +}; + +/* + * Structure to maintain running results for + * a single packet (up to 4 tries). + */ +struct completion { + uint32_t *results; /* running results. */ + int32_t priority[RTE_ACL_MAX_CATEGORIES]; /* running priorities. */ + uint32_t count; /* num of remaining tries */ + /* true for allocated struct */ +} __attribute__((aligned(XMM_SIZE))); + +/* + * One parms structure for each slot in the search engine. + */ +struct parms { + const uint8_t *data; + /* input data for this packet */ + const uint32_t *data_index; + /* data indirection for this trie */ + struct completion *cmplt; + /* completion data for this packet */ +}; + +/* + * Define an global idle node for unused engine slots + */ +static const uint32_t idle[UINT8_MAX + 1]; + +/* + * Allocate a completion structure to manage the tries for a packet. + */ +static inline struct completion * +alloc_completion(struct completion *p, uint32_t size, uint32_t tries, + uint32_t *results) +{ + uint32_t n; + + for (n = 0; n < size; n++) { + + if (p[n].count == 0) { + + /* mark as allocated and set number of tries. */ + p[n].count = tries; + p[n].results = results; + return &(p[n]); + } + } + + /* should never get here */ + return NULL; +} + +/* + * Resolve priority for a single result trie. + */ +static inline void +resolve_single_priority(uint64_t transition, int n, + const struct rte_acl_ctx *ctx, struct parms *parms, + const struct rte_acl_match_results *p) +{ + if (parms[n].cmplt->count == ctx->num_tries || + parms[n].cmplt->priority[0] <= + p[transition].priority[0]) { + + parms[n].cmplt->priority[0] = p[transition].priority[0]; + parms[n].cmplt->results[0] = p[transition].results[0]; + } +} + +/* + * Routine to fill a slot in the parallel trie traversal array (parms) from + * the list of packets (flows). + */ +static inline uint64_t +acl_start_next_trie(struct acl_flow_data *flows, struct parms *parms, int n, + const struct rte_acl_ctx *ctx) +{ + uint64_t transition; + + /* if there are any more packets to process */ + if (flows->num_packets < flows->total_packets) { + parms[n].data = flows->data[flows->num_packets]; + parms[n].data_index = ctx->trie[flows->trie].data_index; + + /* if this is the first trie for this packet */ + if (flows->trie == 0) { + flows->last_cmplt = alloc_completion(flows->cmplt_array, + flows->cmplt_size, ctx->num_tries, + flows->results + + flows->num_packets * flows->categories); + } + + /* set completion parameters and starting index for this slot */ + parms[n].cmplt = flows->last_cmplt; + transition = + flows->trans[parms[n].data[*parms[n].data_index++] + + ctx->trie[flows->trie].root_index]; + + /* + * if this is the last trie for this packet, + * then setup next packet. + */ + flows->trie++; + if (flows->trie >= ctx->num_tries) { + flows->trie = 0; + flows->num_packets++; + } + + /* keep track of number of active trie traversals */ + flows->started++; + + /* no more tries to process, set slot to an idle position */ + } else { + transition = ctx->idle; + parms[n].data = (const uint8_t *)idle; + parms[n].data_index = idle; + } + return transition; +} + +static inline void +acl_set_flow(struct acl_flow_data *flows, struct completion *cmplt, + uint32_t cmplt_size, const uint8_t **data, uint32_t *results, + uint32_t data_num, uint32_t categories, const uint64_t *trans) +{ + flows->num_packets = 0; + flows->started = 0; + flows->trie = 0; + flows->last_cmplt = NULL; + flows->cmplt_array = cmplt; + flows->total_packets = data_num; + flows->categories = categories; + flows->cmplt_size = cmplt_size; + flows->data = data; + flows->results = results; + flows->trans = trans; +} + +typedef void (*resolve_priority_t) +(uint64_t transition, int n, const struct rte_acl_ctx *ctx, + struct parms *parms, const struct rte_acl_match_results *p, + uint32_t categories); + +/* + * Detect matches. If a match node transition is found, then this trie + * traversal is complete and fill the slot with the next trie + * to be processed. + */ +static inline uint64_t +acl_match_check(uint64_t transition, int slot, + const struct rte_acl_ctx *ctx, struct parms *parms, + struct acl_flow_data *flows, resolve_priority_t resolve_priority) +{ + const struct rte_acl_match_results *p; + + p = (const struct rte_acl_match_results *) + (flows->trans + ctx->match_index); + + if (transition & RTE_ACL_NODE_MATCH) { + + /* Remove flags from index and decrement active traversals */ + transition &= RTE_ACL_NODE_INDEX; + flows->started--; + + /* Resolve priorities for this trie and running results */ + if (flows->categories == 1) + resolve_single_priority(transition, slot, ctx, + parms, p); + else + resolve_priority(transition, slot, ctx, parms, + p, flows->categories); + + /* Count down completed tries for this search request */ + parms[slot].cmplt->count--; + + /* Fill the slot with the next trie or idle trie */ + transition = acl_start_next_trie(flows, parms, slot, ctx); + } + + return transition; +} + +#endif /* _ACL_RUN_H_ */ diff --git a/src/dpdk22/lib/librte_acl/acl_run_avx2.h b/src/dpdk22/lib/librte_acl/acl_run_avx2.h new file mode 100644 index 00000000..b01a46a5 --- /dev/null +++ b/src/dpdk22/lib/librte_acl/acl_run_avx2.h @@ -0,0 +1,284 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "acl_run_sse.h" + +static const rte_ymm_t ymm_match_mask = { + .u32 = { + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + }, +}; + +static const rte_ymm_t ymm_index_mask = { + .u32 = { + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + }, +}; + +static const rte_ymm_t ymm_shuffle_input = { + .u32 = { + 0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c, + 0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c, + }, +}; + +static const rte_ymm_t ymm_ones_16 = { + .u16 = { + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + }, +}; + +static const rte_ymm_t ymm_range_base = { + .u32 = { + 0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c, + 0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c, + }, +}; + +/* + * Process 8 transitions in parallel. + * tr_lo contains low 32 bits for 8 transition. + * tr_hi contains high 32 bits for 8 transition. + * next_input contains up to 4 input bytes for 8 flows. + */ +static inline __attribute__((always_inline)) ymm_t +transition8(ymm_t next_input, const uint64_t *trans, ymm_t *tr_lo, ymm_t *tr_hi) +{ + const int32_t *tr; + ymm_t addr; + + tr = (const int32_t *)(uintptr_t)trans; + + /* Calculate the address (array index) for all 8 transitions. */ + ACL_TR_CALC_ADDR(mm256, 256, addr, ymm_index_mask.y, next_input, + ymm_shuffle_input.y, ymm_ones_16.y, ymm_range_base.y, + *tr_lo, *tr_hi); + + /* load lower 32 bits of 8 transactions at once. */ + *tr_lo = _mm256_i32gather_epi32(tr, addr, sizeof(trans[0])); + + next_input = _mm256_srli_epi32(next_input, CHAR_BIT); + + /* load high 32 bits of 8 transactions at once. */ + *tr_hi = _mm256_i32gather_epi32(tr + 1, addr, sizeof(trans[0])); + + return next_input; +} + +/* + * Process matches for 8 flows. + * tr_lo contains low 32 bits for 8 transition. + * tr_hi contains high 32 bits for 8 transition. + */ +static inline void +acl_process_matches_avx2x8(const struct rte_acl_ctx *ctx, + struct parms *parms, struct acl_flow_data *flows, uint32_t slot, + ymm_t matches, ymm_t *tr_lo, ymm_t *tr_hi) +{ + ymm_t t0, t1; + ymm_t lo, hi; + xmm_t l0, l1; + uint32_t i; + uint64_t tr[MAX_SEARCHES_SSE8]; + + l1 = _mm256_extracti128_si256(*tr_lo, 1); + l0 = _mm256_castsi256_si128(*tr_lo); + + for (i = 0; i != RTE_DIM(tr) / 2; i++) { + + /* + * Extract low 32bits of each transition. + * That's enough to process the match. + */ + tr[i] = (uint32_t)_mm_cvtsi128_si32(l0); + tr[i + 4] = (uint32_t)_mm_cvtsi128_si32(l1); + + l0 = _mm_srli_si128(l0, sizeof(uint32_t)); + l1 = _mm_srli_si128(l1, sizeof(uint32_t)); + + tr[i] = acl_match_check(tr[i], slot + i, + ctx, parms, flows, resolve_priority_sse); + tr[i + 4] = acl_match_check(tr[i + 4], slot + i + 4, + ctx, parms, flows, resolve_priority_sse); + } + + /* Collect new transitions into 2 YMM registers. */ + t0 = _mm256_set_epi64x(tr[5], tr[4], tr[1], tr[0]); + t1 = _mm256_set_epi64x(tr[7], tr[6], tr[3], tr[2]); + + /* For each transition: put low 32 into tr_lo and high 32 into tr_hi */ + ACL_TR_HILO(mm256, __m256, t0, t1, lo, hi); + + /* Keep transitions wth NOMATCH intact. */ + *tr_lo = _mm256_blendv_epi8(*tr_lo, lo, matches); + *tr_hi = _mm256_blendv_epi8(*tr_hi, hi, matches); +} + +static inline void +acl_match_check_avx2x8(const struct rte_acl_ctx *ctx, struct parms *parms, + struct acl_flow_data *flows, uint32_t slot, + ymm_t *tr_lo, ymm_t *tr_hi, ymm_t match_mask) +{ + uint32_t msk; + ymm_t matches, temp; + + /* test for match node */ + temp = _mm256_and_si256(match_mask, *tr_lo); + matches = _mm256_cmpeq_epi32(temp, match_mask); + msk = _mm256_movemask_epi8(matches); + + while (msk != 0) { + + acl_process_matches_avx2x8(ctx, parms, flows, slot, + matches, tr_lo, tr_hi); + temp = _mm256_and_si256(match_mask, *tr_lo); + matches = _mm256_cmpeq_epi32(temp, match_mask); + msk = _mm256_movemask_epi8(matches); + } +} + +/* + * Execute trie traversal for up to 16 flows in parallel. + */ +static inline int +search_avx2x16(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t total_packets, uint32_t categories) +{ + uint32_t n; + struct acl_flow_data flows; + uint64_t index_array[MAX_SEARCHES_AVX16]; + struct completion cmplt[MAX_SEARCHES_AVX16]; + struct parms parms[MAX_SEARCHES_AVX16]; + ymm_t input[2], tr_lo[2], tr_hi[2]; + ymm_t t0, t1; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, + total_packets, categories, ctx->trans_table); + + for (n = 0; n < RTE_DIM(cmplt); n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + t0 = _mm256_set_epi64x(index_array[5], index_array[4], + index_array[1], index_array[0]); + t1 = _mm256_set_epi64x(index_array[7], index_array[6], + index_array[3], index_array[2]); + + ACL_TR_HILO(mm256, __m256, t0, t1, tr_lo[0], tr_hi[0]); + + t0 = _mm256_set_epi64x(index_array[13], index_array[12], + index_array[9], index_array[8]); + t1 = _mm256_set_epi64x(index_array[15], index_array[14], + index_array[11], index_array[10]); + + ACL_TR_HILO(mm256, __m256, t0, t1, tr_lo[1], tr_hi[1]); + + /* Check for any matches. */ + acl_match_check_avx2x8(ctx, parms, &flows, 0, &tr_lo[0], &tr_hi[0], + ymm_match_mask.y); + acl_match_check_avx2x8(ctx, parms, &flows, 8, &tr_lo[1], &tr_hi[1], + ymm_match_mask.y); + + while (flows.started > 0) { + + uint32_t in[MAX_SEARCHES_SSE8]; + + /* Gather 4 bytes of input data for first 8 flows. */ + in[0] = GET_NEXT_4BYTES(parms, 0); + in[4] = GET_NEXT_4BYTES(parms, 4); + in[1] = GET_NEXT_4BYTES(parms, 1); + in[5] = GET_NEXT_4BYTES(parms, 5); + in[2] = GET_NEXT_4BYTES(parms, 2); + in[6] = GET_NEXT_4BYTES(parms, 6); + in[3] = GET_NEXT_4BYTES(parms, 3); + in[7] = GET_NEXT_4BYTES(parms, 7); + input[0] = _mm256_set_epi32(in[7], in[6], in[5], in[4], + in[3], in[2], in[1], in[0]); + + /* Gather 4 bytes of input data for last 8 flows. */ + in[0] = GET_NEXT_4BYTES(parms, 8); + in[4] = GET_NEXT_4BYTES(parms, 12); + in[1] = GET_NEXT_4BYTES(parms, 9); + in[5] = GET_NEXT_4BYTES(parms, 13); + in[2] = GET_NEXT_4BYTES(parms, 10); + in[6] = GET_NEXT_4BYTES(parms, 14); + in[3] = GET_NEXT_4BYTES(parms, 11); + in[7] = GET_NEXT_4BYTES(parms, 15); + input[1] = _mm256_set_epi32(in[7], in[6], in[5], in[4], + in[3], in[2], in[1], in[0]); + + input[0] = transition8(input[0], flows.trans, + &tr_lo[0], &tr_hi[0]); + input[1] = transition8(input[1], flows.trans, + &tr_lo[1], &tr_hi[1]); + + input[0] = transition8(input[0], flows.trans, + &tr_lo[0], &tr_hi[0]); + input[1] = transition8(input[1], flows.trans, + &tr_lo[1], &tr_hi[1]); + + input[0] = transition8(input[0], flows.trans, + &tr_lo[0], &tr_hi[0]); + input[1] = transition8(input[1], flows.trans, + &tr_lo[1], &tr_hi[1]); + + input[0] = transition8(input[0], flows.trans, + &tr_lo[0], &tr_hi[0]); + input[1] = transition8(input[1], flows.trans, + &tr_lo[1], &tr_hi[1]); + + /* Check for any matches. */ + acl_match_check_avx2x8(ctx, parms, &flows, 0, + &tr_lo[0], &tr_hi[0], ymm_match_mask.y); + acl_match_check_avx2x8(ctx, parms, &flows, 8, + &tr_lo[1], &tr_hi[1], ymm_match_mask.y); + } + + return 0; +} diff --git a/src/dpdk22/lib/librte_acl/acl_run_neon.h b/src/dpdk22/lib/librte_acl/acl_run_neon.h new file mode 100644 index 00000000..cf7c57fb --- /dev/null +++ b/src/dpdk22/lib/librte_acl/acl_run_neon.h @@ -0,0 +1,289 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "acl_run.h" +#include "acl_vect.h" + +struct _neon_acl_const { + rte_xmm_t xmm_shuffle_input; + rte_xmm_t xmm_index_mask; + rte_xmm_t range_base; +} neon_acl_const __attribute__((aligned(RTE_CACHE_LINE_SIZE))) = { + { + .u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c} + }, + { + .u32 = {RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX} + }, + { + .u32 = {0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c} + }, +}; + +/* + * Resolve priority for multiple results (neon version). + * This consists comparing the priority of the current traversal with the + * running set of results for the packet. + * For each result, keep a running array of the result (rule number) and + * its priority for each category. + */ +static inline void +resolve_priority_neon(uint64_t transition, int n, const struct rte_acl_ctx *ctx, + struct parms *parms, + const struct rte_acl_match_results *p, + uint32_t categories) +{ + uint32_t x; + int32x4_t results, priority, results1, priority1; + uint32x4_t selector; + int32_t *saved_results, *saved_priority; + + for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) { + saved_results = (int32_t *)(&parms[n].cmplt->results[x]); + saved_priority = (int32_t *)(&parms[n].cmplt->priority[x]); + + /* get results and priorities for completed trie */ + results = vld1q_s32( + (const int32_t *)&p[transition].results[x]); + priority = vld1q_s32( + (const int32_t *)&p[transition].priority[x]); + + /* if this is not the first completed trie */ + if (parms[n].cmplt->count != ctx->num_tries) { + /* get running best results and their priorities */ + results1 = vld1q_s32(saved_results); + priority1 = vld1q_s32(saved_priority); + + /* select results that are highest priority */ + selector = vcgtq_s32(priority1, priority); + results = vbslq_s32(selector, results1, results); + priority = vbslq_s32(selector, priority1, priority); + } + + /* save running best results and their priorities */ + vst1q_s32(saved_results, results); + vst1q_s32(saved_priority, priority); + } +} + +/* + * Check for any match in 4 transitions + */ +static inline __attribute__((always_inline)) uint32_t +check_any_match_x4(uint64_t val[]) +{ + return ((val[0] | val[1] | val[2] | val[3]) & RTE_ACL_NODE_MATCH); +} + +static inline __attribute__((always_inline)) void +acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms, + struct acl_flow_data *flows, uint64_t transitions[]) +{ + while (check_any_match_x4(transitions)) { + transitions[0] = acl_match_check(transitions[0], slot, ctx, + parms, flows, resolve_priority_neon); + transitions[1] = acl_match_check(transitions[1], slot + 1, ctx, + parms, flows, resolve_priority_neon); + transitions[2] = acl_match_check(transitions[2], slot + 2, ctx, + parms, flows, resolve_priority_neon); + transitions[3] = acl_match_check(transitions[3], slot + 3, ctx, + parms, flows, resolve_priority_neon); + } +} + +/* + * Process 4 transitions (in 2 NEON Q registers) in parallel + */ +static inline __attribute__((always_inline)) int32x4_t +transition4(int32x4_t next_input, const uint64_t *trans, uint64_t transitions[]) +{ + int32x4x2_t tr_hi_lo; + int32x4_t t, in, r; + uint32x4_t index_msk, node_type, addr; + uint32x4_t dfa_msk, mask, quad_ofs, dfa_ofs; + + /* Move low 32 into tr_hi_lo.val[0] and high 32 into tr_hi_lo.val[1] */ + tr_hi_lo = vld2q_s32((const int32_t *)transitions); + + /* Calculate the address (array index) for all 4 transitions. */ + + index_msk = vld1q_u32((const uint32_t *)&neon_acl_const.xmm_index_mask); + + /* Calc node type and node addr */ + node_type = vbicq_s32(tr_hi_lo.val[0], index_msk); + addr = vandq_s32(tr_hi_lo.val[0], index_msk); + + /* t = 0 */ + t = veorq_s32(node_type, node_type); + + /* mask for DFA type(0) nodes */ + dfa_msk = vceqq_u32(node_type, t); + + mask = vld1q_s32((const int32_t *)&neon_acl_const.xmm_shuffle_input); + in = vqtbl1q_u8((uint8x16_t)next_input, (uint8x16_t)mask); + + /* DFA calculations. */ + r = vshrq_n_u32(in, 30); /* div by 64 */ + mask = vld1q_s32((const int32_t *)&neon_acl_const.range_base); + r = vaddq_u8(r, mask); + t = vshrq_n_u32(in, 24); + r = vqtbl1q_u8((uint8x16_t)tr_hi_lo.val[1], (uint8x16_t)r); + dfa_ofs = vsubq_s32(t, r); + + /* QUAD/SINGLE calculations. */ + t = vcgtq_s8(in, tr_hi_lo.val[1]); + t = vabsq_s8(t); + t = vpaddlq_u8(t); + quad_ofs = vpaddlq_u16(t); + + /* blend DFA and QUAD/SINGLE. */ + t = vbslq_u8(dfa_msk, dfa_ofs, quad_ofs); + + /* calculate address for next transitions */ + addr = vaddq_u32(addr, t); + + /* Fill next transitions */ + transitions[0] = trans[vgetq_lane_u32(addr, 0)]; + transitions[1] = trans[vgetq_lane_u32(addr, 1)]; + transitions[2] = trans[vgetq_lane_u32(addr, 2)]; + transitions[3] = trans[vgetq_lane_u32(addr, 3)]; + + return vshrq_n_u32(next_input, CHAR_BIT); +} + +/* + * Execute trie traversal with 8 traversals in parallel + */ +static inline int +search_neon_8(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t total_packets, uint32_t categories) +{ + int n; + struct acl_flow_data flows; + uint64_t index_array[8]; + struct completion cmplt[8]; + struct parms parms[8]; + int32x4_t input0, input1; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, + total_packets, categories, ctx->trans_table); + + for (n = 0; n < 8; n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, &index_array[0]); + acl_match_check_x4(4, ctx, parms, &flows, &index_array[4]); + + while (flows.started > 0) { + /* Gather 4 bytes of input data for each stream. */ + input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input0, 0); + input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 4), input1, 0); + + input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 1), input0, 1); + input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 5), input1, 1); + + input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 2), input0, 2); + input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 6), input1, 2); + + input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 3), input0, 3); + input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 7), input1, 3); + + /* Process the 4 bytes of input on each stream. */ + + input0 = transition4(input0, flows.trans, &index_array[0]); + input1 = transition4(input1, flows.trans, &index_array[4]); + + input0 = transition4(input0, flows.trans, &index_array[0]); + input1 = transition4(input1, flows.trans, &index_array[4]); + + input0 = transition4(input0, flows.trans, &index_array[0]); + input1 = transition4(input1, flows.trans, &index_array[4]); + + input0 = transition4(input0, flows.trans, &index_array[0]); + input1 = transition4(input1, flows.trans, &index_array[4]); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, &index_array[0]); + acl_match_check_x4(4, ctx, parms, &flows, &index_array[4]); + } + + return 0; +} + +/* + * Execute trie traversal with 4 traversals in parallel + */ +static inline int +search_neon_4(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, int total_packets, uint32_t categories) +{ + int n; + struct acl_flow_data flows; + uint64_t index_array[4]; + struct completion cmplt[4]; + struct parms parms[4]; + int32x4_t input; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, + total_packets, categories, ctx->trans_table); + + for (n = 0; n < 4; n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, index_array); + + while (flows.started > 0) { + /* Gather 4 bytes of input data for each stream. */ + input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input, 0); + input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 1), input, 1); + input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 2), input, 2); + input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 3), input, 3); + + /* Process the 4 bytes of input on each stream. */ + input = transition4(input, flows.trans, index_array); + input = transition4(input, flows.trans, index_array); + input = transition4(input, flows.trans, index_array); + input = transition4(input, flows.trans, index_array); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, index_array); + } + + return 0; +} diff --git a/src/dpdk22/lib/librte_acl/acl_run_sse.h b/src/dpdk22/lib/librte_acl/acl_run_sse.h new file mode 100644 index 00000000..ad40a674 --- /dev/null +++ b/src/dpdk22/lib/librte_acl/acl_run_sse.h @@ -0,0 +1,357 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "acl_run.h" +#include "acl_vect.h" + +enum { + SHUFFLE32_SLOT1 = 0xe5, + SHUFFLE32_SLOT2 = 0xe6, + SHUFFLE32_SLOT3 = 0xe7, + SHUFFLE32_SWAP64 = 0x4e, +}; + +static const rte_xmm_t xmm_shuffle_input = { + .u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c}, +}; + +static const rte_xmm_t xmm_ones_16 = { + .u16 = {1, 1, 1, 1, 1, 1, 1, 1}, +}; + +static const rte_xmm_t xmm_match_mask = { + .u32 = { + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + }, +}; + +static const rte_xmm_t xmm_index_mask = { + .u32 = { + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + }, +}; + +static const rte_xmm_t xmm_range_base = { + .u32 = { + 0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c, + }, +}; + +/* + * Resolve priority for multiple results (sse version). + * This consists comparing the priority of the current traversal with the + * running set of results for the packet. + * For each result, keep a running array of the result (rule number) and + * its priority for each category. + */ +static inline void +resolve_priority_sse(uint64_t transition, int n, const struct rte_acl_ctx *ctx, + struct parms *parms, const struct rte_acl_match_results *p, + uint32_t categories) +{ + uint32_t x; + xmm_t results, priority, results1, priority1, selector; + xmm_t *saved_results, *saved_priority; + + for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) { + + saved_results = (xmm_t *)(&parms[n].cmplt->results[x]); + saved_priority = + (xmm_t *)(&parms[n].cmplt->priority[x]); + + /* get results and priorities for completed trie */ + results = _mm_loadu_si128( + (const xmm_t *)&p[transition].results[x]); + priority = _mm_loadu_si128( + (const xmm_t *)&p[transition].priority[x]); + + /* if this is not the first completed trie */ + if (parms[n].cmplt->count != ctx->num_tries) { + + /* get running best results and their priorities */ + results1 = _mm_loadu_si128(saved_results); + priority1 = _mm_loadu_si128(saved_priority); + + /* select results that are highest priority */ + selector = _mm_cmpgt_epi32(priority1, priority); + results = _mm_blendv_epi8(results, results1, selector); + priority = _mm_blendv_epi8(priority, priority1, + selector); + } + + /* save running best results and their priorities */ + _mm_storeu_si128(saved_results, results); + _mm_storeu_si128(saved_priority, priority); + } +} + +/* + * Extract transitions from an XMM register and check for any matches + */ +static void +acl_process_matches(xmm_t *indices, int slot, const struct rte_acl_ctx *ctx, + struct parms *parms, struct acl_flow_data *flows) +{ + uint64_t transition1, transition2; + + /* extract transition from low 64 bits. */ + transition1 = _mm_cvtsi128_si64(*indices); + + /* extract transition from high 64 bits. */ + *indices = _mm_shuffle_epi32(*indices, SHUFFLE32_SWAP64); + transition2 = _mm_cvtsi128_si64(*indices); + + transition1 = acl_match_check(transition1, slot, ctx, + parms, flows, resolve_priority_sse); + transition2 = acl_match_check(transition2, slot + 1, ctx, + parms, flows, resolve_priority_sse); + + /* update indices with new transitions. */ + *indices = _mm_set_epi64x(transition2, transition1); +} + +/* + * Check for any match in 4 transitions (contained in 2 SSE registers) + */ +static inline __attribute__((always_inline)) void +acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms, + struct acl_flow_data *flows, xmm_t *indices1, xmm_t *indices2, + xmm_t match_mask) +{ + xmm_t temp; + + /* put low 32 bits of each transition into one register */ + temp = (xmm_t)_mm_shuffle_ps((__m128)*indices1, (__m128)*indices2, + 0x88); + /* test for match node */ + temp = _mm_and_si128(match_mask, temp); + + while (!_mm_testz_si128(temp, temp)) { + acl_process_matches(indices1, slot, ctx, parms, flows); + acl_process_matches(indices2, slot + 2, ctx, parms, flows); + + temp = (xmm_t)_mm_shuffle_ps((__m128)*indices1, + (__m128)*indices2, + 0x88); + temp = _mm_and_si128(match_mask, temp); + } +} + +/* + * Process 4 transitions (in 2 XMM registers) in parallel + */ +static inline __attribute__((always_inline)) xmm_t +transition4(xmm_t next_input, const uint64_t *trans, + xmm_t *indices1, xmm_t *indices2) +{ + xmm_t addr, tr_lo, tr_hi; + uint64_t trans0, trans2; + + /* Shuffle low 32 into tr_lo and high 32 into tr_hi */ + ACL_TR_HILO(mm, __m128, *indices1, *indices2, tr_lo, tr_hi); + + /* Calculate the address (array index) for all 4 transitions. */ + ACL_TR_CALC_ADDR(mm, 128, addr, xmm_index_mask.x, next_input, + xmm_shuffle_input.x, xmm_ones_16.x, xmm_range_base.x, + tr_lo, tr_hi); + + /* Gather 64 bit transitions and pack back into 2 registers. */ + + trans0 = trans[_mm_cvtsi128_si32(addr)]; + + /* get slot 2 */ + + /* {x0, x1, x2, x3} -> {x2, x1, x2, x3} */ + addr = _mm_shuffle_epi32(addr, SHUFFLE32_SLOT2); + trans2 = trans[_mm_cvtsi128_si32(addr)]; + + /* get slot 1 */ + + /* {x2, x1, x2, x3} -> {x1, x1, x2, x3} */ + addr = _mm_shuffle_epi32(addr, SHUFFLE32_SLOT1); + *indices1 = _mm_set_epi64x(trans[_mm_cvtsi128_si32(addr)], trans0); + + /* get slot 3 */ + + /* {x1, x1, x2, x3} -> {x3, x1, x2, x3} */ + addr = _mm_shuffle_epi32(addr, SHUFFLE32_SLOT3); + *indices2 = _mm_set_epi64x(trans[_mm_cvtsi128_si32(addr)], trans2); + + return _mm_srli_epi32(next_input, CHAR_BIT); +} + +/* + * Execute trie traversal with 8 traversals in parallel + */ +static inline int +search_sse_8(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t total_packets, uint32_t categories) +{ + int n; + struct acl_flow_data flows; + uint64_t index_array[MAX_SEARCHES_SSE8]; + struct completion cmplt[MAX_SEARCHES_SSE8]; + struct parms parms[MAX_SEARCHES_SSE8]; + xmm_t input0, input1; + xmm_t indices1, indices2, indices3, indices4; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, + total_packets, categories, ctx->trans_table); + + for (n = 0; n < MAX_SEARCHES_SSE8; n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + /* + * indices1 contains index_array[0,1] + * indices2 contains index_array[2,3] + * indices3 contains index_array[4,5] + * indices4 contains index_array[6,7] + */ + + indices1 = _mm_loadu_si128((xmm_t *) &index_array[0]); + indices2 = _mm_loadu_si128((xmm_t *) &index_array[2]); + + indices3 = _mm_loadu_si128((xmm_t *) &index_array[4]); + indices4 = _mm_loadu_si128((xmm_t *) &index_array[6]); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, + &indices1, &indices2, xmm_match_mask.x); + acl_match_check_x4(4, ctx, parms, &flows, + &indices3, &indices4, xmm_match_mask.x); + + while (flows.started > 0) { + + /* Gather 4 bytes of input data for each stream. */ + input0 = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 0)); + input1 = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 4)); + + input0 = _mm_insert_epi32(input0, GET_NEXT_4BYTES(parms, 1), 1); + input1 = _mm_insert_epi32(input1, GET_NEXT_4BYTES(parms, 5), 1); + + input0 = _mm_insert_epi32(input0, GET_NEXT_4BYTES(parms, 2), 2); + input1 = _mm_insert_epi32(input1, GET_NEXT_4BYTES(parms, 6), 2); + + input0 = _mm_insert_epi32(input0, GET_NEXT_4BYTES(parms, 3), 3); + input1 = _mm_insert_epi32(input1, GET_NEXT_4BYTES(parms, 7), 3); + + /* Process the 4 bytes of input on each stream. */ + + input0 = transition4(input0, flows.trans, + &indices1, &indices2); + input1 = transition4(input1, flows.trans, + &indices3, &indices4); + + input0 = transition4(input0, flows.trans, + &indices1, &indices2); + input1 = transition4(input1, flows.trans, + &indices3, &indices4); + + input0 = transition4(input0, flows.trans, + &indices1, &indices2); + input1 = transition4(input1, flows.trans, + &indices3, &indices4); + + input0 = transition4(input0, flows.trans, + &indices1, &indices2); + input1 = transition4(input1, flows.trans, + &indices3, &indices4); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, + &indices1, &indices2, xmm_match_mask.x); + acl_match_check_x4(4, ctx, parms, &flows, + &indices3, &indices4, xmm_match_mask.x); + } + + return 0; +} + +/* + * Execute trie traversal with 4 traversals in parallel + */ +static inline int +search_sse_4(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, int total_packets, uint32_t categories) +{ + int n; + struct acl_flow_data flows; + uint64_t index_array[MAX_SEARCHES_SSE4]; + struct completion cmplt[MAX_SEARCHES_SSE4]; + struct parms parms[MAX_SEARCHES_SSE4]; + xmm_t input, indices1, indices2; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, + total_packets, categories, ctx->trans_table); + + for (n = 0; n < MAX_SEARCHES_SSE4; n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + indices1 = _mm_loadu_si128((xmm_t *) &index_array[0]); + indices2 = _mm_loadu_si128((xmm_t *) &index_array[2]); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, + &indices1, &indices2, xmm_match_mask.x); + + while (flows.started > 0) { + + /* Gather 4 bytes of input data for each stream. */ + input = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 0)); + input = _mm_insert_epi32(input, GET_NEXT_4BYTES(parms, 1), 1); + input = _mm_insert_epi32(input, GET_NEXT_4BYTES(parms, 2), 2); + input = _mm_insert_epi32(input, GET_NEXT_4BYTES(parms, 3), 3); + + /* Process the 4 bytes of input on each stream. */ + input = transition4(input, flows.trans, &indices1, &indices2); + input = transition4(input, flows.trans, &indices1, &indices2); + input = transition4(input, flows.trans, &indices1, &indices2); + input = transition4(input, flows.trans, &indices1, &indices2); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, + &indices1, &indices2, xmm_match_mask.x); + } + + return 0; +} diff --git a/src/dpdk22/lib/librte_acl/acl_vect.h b/src/dpdk22/lib/librte_acl/acl_vect.h new file mode 100644 index 00000000..6cc19997 --- /dev/null +++ b/src/dpdk22/lib/librte_acl/acl_vect.h @@ -0,0 +1,116 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ACL_VECT_H_ +#define _RTE_ACL_VECT_H_ + +/** + * @file + * + * RTE ACL SSE/AVX related header. + */ + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * Takes 2 SIMD registers containing N transitions eachi (tr0, tr1). + * Shuffles it into different representation: + * lo - contains low 32 bits of given N transitions. + * hi - contains high 32 bits of given N transitions. + */ +#define ACL_TR_HILO(P, TC, tr0, tr1, lo, hi) do { \ + lo = (typeof(lo))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0x88); \ + hi = (typeof(hi))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0xdd); \ +} while (0) + + +/* + * Calculate the address of the next transition for + * all types of nodes. Note that only DFA nodes and range + * nodes actually transition to another node. Match + * nodes not supposed to be encountered here. + * For quad range nodes: + * Calculate number of range boundaries that are less than the + * input value. Range boundaries for each node are in signed 8 bit, + * ordered from -128 to 127. + * This is effectively a popcnt of bytes that are greater than the + * input byte. + * Single nodes are processed in the same ways as quad range nodes. +*/ +#define ACL_TR_CALC_ADDR(P, S, \ + addr, index_mask, next_input, shuffle_input, \ + ones_16, range_base, tr_lo, tr_hi) do { \ + \ + typeof(addr) in, node_type, r, t; \ + typeof(addr) dfa_msk, dfa_ofs, quad_ofs; \ + \ + t = _##P##_xor_si##S(index_mask, index_mask); \ + in = _##P##_shuffle_epi8(next_input, shuffle_input); \ + \ + /* Calc node type and node addr */ \ + node_type = _##P##_andnot_si##S(index_mask, tr_lo); \ + addr = _##P##_and_si##S(index_mask, tr_lo); \ + \ + /* mask for DFA type(0) nodes */ \ + dfa_msk = _##P##_cmpeq_epi32(node_type, t); \ + \ + /* DFA calculations. */ \ + r = _##P##_srli_epi32(in, 30); \ + r = _##P##_add_epi8(r, range_base); \ + t = _##P##_srli_epi32(in, 24); \ + r = _##P##_shuffle_epi8(tr_hi, r); \ + \ + dfa_ofs = _##P##_sub_epi32(t, r); \ + \ + /* QUAD/SINGLE caluclations. */ \ + t = _##P##_cmpgt_epi8(in, tr_hi); \ + t = _##P##_sign_epi8(t, t); \ + t = _##P##_maddubs_epi16(t, t); \ + quad_ofs = _##P##_madd_epi16(t, ones_16); \ + \ + /* blend DFA and QUAD/SINGLE. */ \ + t = _##P##_blendv_epi8(quad_ofs, dfa_ofs, dfa_msk); \ + \ + /* calculate address for next transitions. */ \ + addr = _##P##_add_epi32(addr, t); \ +} while (0) + + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ACL_VECT_H_ */ diff --git a/src/dpdk22/lib/librte_acl/rte_acl.h b/src/dpdk22/lib/librte_acl/rte_acl.h new file mode 100644 index 00000000..0979a098 --- /dev/null +++ b/src/dpdk22/lib/librte_acl/rte_acl.h @@ -0,0 +1,388 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ACL_H_ +#define _RTE_ACL_H_ + +/** + * @file + * + * RTE Classifier. + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_ACL_MAX_CATEGORIES 16 + +#define RTE_ACL_RESULTS_MULTIPLIER (XMM_SIZE / sizeof(uint32_t)) + +#define RTE_ACL_MAX_LEVELS 64 +#define RTE_ACL_MAX_FIELDS 64 + +union rte_acl_field_types { + uint8_t u8; + uint16_t u16; + uint32_t u32; + uint64_t u64; +}; + +enum { + RTE_ACL_FIELD_TYPE_MASK = 0, + RTE_ACL_FIELD_TYPE_RANGE, + RTE_ACL_FIELD_TYPE_BITMASK +}; + +/** + * ACL Field definition. + * Each field in the ACL rule has an associate definition. + * It defines the type of field, its size, its offset in the input buffer, + * the field index, and the input index. + * For performance reasons, the inner loop of the search function is unrolled + * to process four input bytes at a time. This requires the input to be grouped + * into sets of 4 consecutive bytes. The loop processes the first input byte as + * part of the setup and then subsequent bytes must be in groups of 4 + * consecutive bytes. + */ +struct rte_acl_field_def { + uint8_t type; /**< type - RTE_ACL_FIELD_TYPE_*. */ + uint8_t size; /**< size of field 1,2,4, or 8. */ + uint8_t field_index; /**< index of field inside the rule. */ + uint8_t input_index; /**< 0-N input index. */ + uint32_t offset; /**< offset to start of field. */ +}; + +/** + * ACL build configuration. + * Defines the fields of an ACL trie and number of categories to build with. + */ +struct rte_acl_config { + uint32_t num_categories; /**< Number of categories to build with. */ + uint32_t num_fields; /**< Number of field definitions. */ + struct rte_acl_field_def defs[RTE_ACL_MAX_FIELDS]; + /**< array of field definitions. */ + size_t max_size; + /**< max memory limit for internal run-time structures. */ +}; + +/** + * Defines the value of a field for a rule. + */ +struct rte_acl_field { + union rte_acl_field_types value; + /**< a 1,2,4, or 8 byte value of the field. */ + union rte_acl_field_types mask_range; + /**< + * depending on field type: + * mask -> 1.2.3.4/32 value=0x1020304, mask_range=32, + * range -> 0 : 65535 value=0, mask_range=65535, + * bitmask -> 0x06/0xff value=6, mask_range=0xff. + */ +}; + +enum { + RTE_ACL_TYPE_SHIFT = 29, + RTE_ACL_MAX_INDEX = RTE_LEN2MASK(RTE_ACL_TYPE_SHIFT, uint32_t), + RTE_ACL_MAX_PRIORITY = RTE_ACL_MAX_INDEX, + RTE_ACL_MIN_PRIORITY = 0, +}; + +#define RTE_ACL_INVALID_USERDATA 0 + +#define RTE_ACL_MASKLEN_TO_BITMASK(v, s) \ +((v) == 0 ? (v) : (typeof(v))((uint64_t)-1 << ((s) * CHAR_BIT - (v)))) + +/** + * Miscellaneous data for ACL rule. + */ +struct rte_acl_rule_data { + uint32_t category_mask; /**< Mask of categories for that rule. */ + int32_t priority; /**< Priority for that rule. */ + uint32_t userdata; /**< Associated with the rule user data. */ +}; + +/** + * Defines single ACL rule. + * data - miscellaneous data for the rule. + * field[] - value and mask or range for each field. + */ +#define RTE_ACL_RULE_DEF(name, fld_num) struct name {\ + struct rte_acl_rule_data data; \ + struct rte_acl_field field[fld_num]; \ +} + +RTE_ACL_RULE_DEF(rte_acl_rule, 0); + +#define RTE_ACL_RULE_SZ(fld_num) \ + (sizeof(struct rte_acl_rule) + sizeof(struct rte_acl_field) * (fld_num)) + + +/** Max number of characters in name.*/ +#define RTE_ACL_NAMESIZE 32 + +/** + * Parameters used when creating the ACL context. + */ +struct rte_acl_param { + const char *name; /**< Name of the ACL context. */ + int socket_id; /**< Socket ID to allocate memory for. */ + uint32_t rule_size; /**< Size of each rule. */ + uint32_t max_rule_num; /**< Maximum number of rules. */ +}; + + +/** + * Create a new ACL context. + * + * @param param + * Parameters used to create and initialise the ACL context. + * @return + * Pointer to ACL context structure that is used in future ACL + * operations, or NULL on error, with error code set in rte_errno. + * Possible rte_errno errors include: + * - EINVAL - invalid parameter passed to function + */ +struct rte_acl_ctx * +rte_acl_create(const struct rte_acl_param *param); + +/** + * Find an existing ACL context object and return a pointer to it. + * + * @param name + * Name of the ACL context as passed to rte_acl_create() + * @return + * Pointer to ACL context or NULL if object not found + * with rte_errno set appropriately. Possible rte_errno values include: + * - ENOENT - value not available for return + */ +struct rte_acl_ctx * +rte_acl_find_existing(const char *name); + +/** + * De-allocate all memory used by ACL context. + * + * @param ctx + * ACL context to free + */ +void +rte_acl_free(struct rte_acl_ctx *ctx); + +/** + * Add rules to an existing ACL context. + * This function is not multi-thread safe. + * + * @param ctx + * ACL context to add patterns to. + * @param rules + * Array of rules to add to the ACL context. + * Note that all fields in rte_acl_rule structures are expected + * to be in host byte order. + * Each rule expected to be in the same format and not exceed size + * specified at ACL context creation time. + * @param num + * Number of elements in the input array of rules. + * @return + * - -ENOMEM if there is no space in the ACL context for these rules. + * - -EINVAL if the parameters are invalid. + * - Zero if operation completed successfully. + */ +int +rte_acl_add_rules(struct rte_acl_ctx *ctx, const struct rte_acl_rule *rules, + uint32_t num); + +/** + * Delete all rules from the ACL context. + * This function is not multi-thread safe. + * Note that internal run-time structures are not affected. + * + * @param ctx + * ACL context to delete rules from. + */ +void +rte_acl_reset_rules(struct rte_acl_ctx *ctx); + +/** + * Analyze set of rules and build required internal run-time structures. + * This function is not multi-thread safe. + * + * @param ctx + * ACL context to build. + * @param cfg + * Pointer to struct rte_acl_config - defines build parameters. + * @return + * - -ENOMEM if couldn't allocate enough memory. + * - -EINVAL if the parameters are invalid. + * - Negative error code if operation failed. + * - Zero if operation completed successfully. + */ +int +rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg); + +/** + * Delete all rules from the ACL context and + * destroy all internal run-time structures. + * This function is not multi-thread safe. + * + * @param ctx + * ACL context to reset. + */ +void +rte_acl_reset(struct rte_acl_ctx *ctx); + +/** + * Available implementations of ACL classify. + */ +enum rte_acl_classify_alg { + RTE_ACL_CLASSIFY_DEFAULT = 0, + RTE_ACL_CLASSIFY_SCALAR = 1, /**< generic implementation. */ + RTE_ACL_CLASSIFY_SSE = 2, /**< requires SSE4.1 support. */ + RTE_ACL_CLASSIFY_AVX2 = 3, /**< requires AVX2 support. */ + RTE_ACL_CLASSIFY_NEON = 4, /**< requires NEON support. */ + RTE_ACL_CLASSIFY_NUM /* should always be the last one. */ +}; + +/** + * Perform search for a matching ACL rule for each input data buffer. + * Each input data buffer can have up to *categories* matches. + * That implies that results array should be big enough to hold + * (categories * num) elements. + * Also categories parameter should be either one or multiple of + * RTE_ACL_RESULTS_MULTIPLIER and can't be bigger than RTE_ACL_MAX_CATEGORIES. + * If more than one rule is applicable for given input buffer and + * given category, then rule with highest priority will be returned as a match. + * Note, that it is a caller's responsibility to ensure that input parameters + * are valid and point to correct memory locations. + * + * @param ctx + * ACL context to search with. + * @param data + * Array of pointers to input data buffers to perform search. + * Note that all fields in input data buffers supposed to be in network + * byte order (MSB). + * @param results + * Array of search results, *categories* results per each input data buffer. + * @param num + * Number of elements in the input data buffers array. + * @param categories + * Number of maximum possible matches for each input buffer, one possible + * match per category. + * @return + * zero on successful completion. + * -EINVAL for incorrect arguments. + */ +extern int +rte_acl_classify(const struct rte_acl_ctx *ctx, + const uint8_t **data, + uint32_t *results, uint32_t num, + uint32_t categories); + +/** + * Perform search using specified algorithm for a matching ACL rule for + * each input data buffer. + * Each input data buffer can have up to *categories* matches. + * That implies that results array should be big enough to hold + * (categories * num) elements. + * Also categories parameter should be either one or multiple of + * RTE_ACL_RESULTS_MULTIPLIER and can't be bigger than RTE_ACL_MAX_CATEGORIES. + * If more than one rule is applicable for given input buffer and + * given category, then rule with highest priority will be returned as a match. + * Note, that it is a caller's responsibility to ensure that input parameters + * are valid and point to correct memory locations. + * + * @param ctx + * ACL context to search with. + * @param data + * Array of pointers to input data buffers to perform search. + * Note that all fields in input data buffers supposed to be in network + * byte order (MSB). + * @param results + * Array of search results, *categories* results per each input data buffer. + * @param num + * Number of elements in the input data buffers array. + * @param categories + * Number of maximum possible matches for each input buffer, one possible + * match per category. + * @param alg + * Algorithm to be used for the search. + * It is the caller responsibility to ensure that the value refers to the + * existing algorithm, and that it could be run on the given CPU. + * @return + * zero on successful completion. + * -EINVAL for incorrect arguments. + */ +extern int +rte_acl_classify_alg(const struct rte_acl_ctx *ctx, + const uint8_t **data, + uint32_t *results, uint32_t num, + uint32_t categories, + enum rte_acl_classify_alg alg); + +/* + * Override the default classifier function for a given ACL context. + * @param ctx + * ACL context to change classify function for. + * @param alg + * New default classify algorithm for given ACL context. + * It is the caller responsibility to ensure that the value refers to the + * existing algorithm, and that it could be run on the given CPU. + * @return + * - -EINVAL if the parameters are invalid. + * - Zero if operation completed successfully. + */ +extern int +rte_acl_set_ctx_classify(struct rte_acl_ctx *ctx, + enum rte_acl_classify_alg alg); + +/** + * Dump an ACL context structure to the console. + * + * @param ctx + * ACL context to dump. + */ +void +rte_acl_dump(const struct rte_acl_ctx *ctx); + +/** + * Dump all ACL context structures to the console. + */ +void +rte_acl_list_dump(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ACL_H_ */ diff --git a/src/dpdk22/lib/librte_acl/rte_acl_osdep.h b/src/dpdk22/lib/librte_acl/rte_acl_osdep.h new file mode 100644 index 00000000..41f7e3d4 --- /dev/null +++ b/src/dpdk22/lib/librte_acl/rte_acl_osdep.h @@ -0,0 +1,80 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ACL_OSDEP_H_ +#define _RTE_ACL_OSDEP_H_ + +/** + * @file + * + * RTE ACL DPDK/OS dependent file. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Common defines. + */ + +#define DIM(x) RTE_DIM(x) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif /* _RTE_ACL_OSDEP_H_ */ diff --git a/src/dpdk22/lib/librte_acl/tb_mem.h b/src/dpdk22/lib/librte_acl/tb_mem.h new file mode 100644 index 00000000..ca7af966 --- /dev/null +++ b/src/dpdk22/lib/librte_acl/tb_mem.h @@ -0,0 +1,76 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TB_MEM_H_ +#define _TB_MEM_H_ + +/** + * @file + * + * RTE ACL temporary (build phase) memory management. + * Contains structures and functions to manage temporary (used by build only) + * memory. Memory allocated in large blocks to speed 'free' when trie is + * destructed (finish of build phase). + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +struct tb_mem_block { + struct tb_mem_block *next; + struct tb_mem_pool *pool; + size_t size; + uint8_t *mem; +}; + +struct tb_mem_pool { + struct tb_mem_block *block; + size_t alignment; + size_t min_alloc; + size_t alloc; + /* jump target in case of memory allocation failure. */ + sigjmp_buf fail; +}; + +void *tb_alloc(struct tb_mem_pool *pool, size_t size); +void tb_free_pool(struct tb_mem_pool *pool); + +#ifdef __cplusplus +} +#endif + +#endif /* _TB_MEM_H_ */ diff --git a/src/dpdk22/lib/librte_cfgfile/rte_cfgfile.c b/src/dpdk22/lib/librte_cfgfile/rte_cfgfile.c new file mode 100644 index 00000000..a677dade --- /dev/null +++ b/src/dpdk22/lib/librte_cfgfile/rte_cfgfile.c @@ -0,0 +1,356 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include "rte_cfgfile.h" + +struct rte_cfgfile_section { + char name[CFG_NAME_LEN]; + int num_entries; + struct rte_cfgfile_entry *entries[0]; +}; + +struct rte_cfgfile { + int flags; + int num_sections; + struct rte_cfgfile_section *sections[0]; +}; + +/** when we resize a file structure, how many extra entries + * for new sections do we add in */ +#define CFG_ALLOC_SECTION_BATCH 8 +/** when we resize a section structure, how many extra entries + * for new entries do we add in */ +#define CFG_ALLOC_ENTRY_BATCH 16 + +static unsigned +_strip(char *str, unsigned len) +{ + int newlen = len; + if (len == 0) + return 0; + + if (isspace(str[len-1])) { + /* strip trailing whitespace */ + while (newlen > 0 && isspace(str[newlen - 1])) + str[--newlen] = '\0'; + } + + if (isspace(str[0])) { + /* strip leading whitespace */ + int i, start = 1; + while (isspace(str[start]) && start < newlen) + start++ + ; /* do nothing */ + newlen -= start; + for (i = 0; i < newlen; i++) + str[i] = str[i+start]; + str[i] = '\0'; + } + return newlen; +} + +struct rte_cfgfile * +rte_cfgfile_load(const char *filename, int flags) +{ + int allocated_sections = CFG_ALLOC_SECTION_BATCH; + int allocated_entries = 0; + int curr_section = -1; + int curr_entry = -1; + char buffer[256] = {0}; + int lineno = 0; + struct rte_cfgfile *cfg = NULL; + + FILE *f = fopen(filename, "r"); + if (f == NULL) + return NULL; + + cfg = malloc(sizeof(*cfg) + sizeof(cfg->sections[0]) * + allocated_sections); + if (cfg == NULL) + goto error2; + + memset(cfg->sections, 0, sizeof(cfg->sections[0]) * allocated_sections); + + while (fgets(buffer, sizeof(buffer), f) != NULL) { + char *pos = NULL; + size_t len = strnlen(buffer, sizeof(buffer)); + lineno++; + if ((len >= sizeof(buffer) - 1) && (buffer[len-1] != '\n')) { + printf("Error line %d - no \\n found on string. " + "Check if line too long\n", lineno); + goto error1; + } + pos = memchr(buffer, ';', sizeof(buffer)); + if (pos != NULL) { + *pos = '\0'; + len = pos - buffer; + } + + len = _strip(buffer, len); + if (buffer[0] != '[' && memchr(buffer, '=', len) == NULL) + continue; + + if (buffer[0] == '[') { + /* section heading line */ + char *end = memchr(buffer, ']', len); + if (end == NULL) { + printf("Error line %d - no terminating '['" + "character found\n", lineno); + goto error1; + } + *end = '\0'; + _strip(&buffer[1], end - &buffer[1]); + + /* close off old section and add start new one */ + if (curr_section >= 0) + cfg->sections[curr_section]->num_entries = + curr_entry + 1; + curr_section++; + + /* resize overall struct if we don't have room for more + sections */ + if (curr_section == allocated_sections) { + allocated_sections += CFG_ALLOC_SECTION_BATCH; + struct rte_cfgfile *n_cfg = realloc(cfg, + sizeof(*cfg) + sizeof(cfg->sections[0]) + * allocated_sections); + if (n_cfg == NULL) { + printf("Error - no more memory\n"); + goto error1; + } + cfg = n_cfg; + } + + /* allocate space for new section */ + allocated_entries = CFG_ALLOC_ENTRY_BATCH; + curr_entry = -1; + cfg->sections[curr_section] = malloc( + sizeof(*cfg->sections[0]) + + sizeof(cfg->sections[0]->entries[0]) * + allocated_entries); + if (cfg->sections[curr_section] == NULL) { + printf("Error - no more memory\n"); + goto error1; + } + + snprintf(cfg->sections[curr_section]->name, + sizeof(cfg->sections[0]->name), + "%s", &buffer[1]); + } else { + /* value line */ + if (curr_section < 0) { + printf("Error line %d - value outside of" + "section\n", lineno); + goto error1; + } + + struct rte_cfgfile_section *sect = + cfg->sections[curr_section]; + char *split[2]; + if (rte_strsplit(buffer, sizeof(buffer), split, 2, '=') + != 2) { + printf("Error at line %d - cannot split " + "string\n", lineno); + goto error1; + } + + curr_entry++; + if (curr_entry == allocated_entries) { + allocated_entries += CFG_ALLOC_ENTRY_BATCH; + struct rte_cfgfile_section *n_sect = realloc( + sect, sizeof(*sect) + + sizeof(sect->entries[0]) * + allocated_entries); + if (n_sect == NULL) { + printf("Error - no more memory\n"); + goto error1; + } + sect = cfg->sections[curr_section] = n_sect; + } + + sect->entries[curr_entry] = malloc( + sizeof(*sect->entries[0])); + if (sect->entries[curr_entry] == NULL) { + printf("Error - no more memory\n"); + goto error1; + } + + struct rte_cfgfile_entry *entry = sect->entries[ + curr_entry]; + snprintf(entry->name, sizeof(entry->name), "%s", + split[0]); + snprintf(entry->value, sizeof(entry->value), "%s", + split[1]); + _strip(entry->name, strnlen(entry->name, + sizeof(entry->name))); + _strip(entry->value, strnlen(entry->value, + sizeof(entry->value))); + } + } + fclose(f); + cfg->flags = flags; + cfg->num_sections = curr_section + 1; + /* curr_section will still be -1 if we have an empty file */ + if (curr_section >= 0) + cfg->sections[curr_section]->num_entries = curr_entry + 1; + return cfg; + +error1: + rte_cfgfile_close(cfg); +error2: + fclose(f); + return NULL; +} + + +int rte_cfgfile_close(struct rte_cfgfile *cfg) +{ + int i, j; + + if (cfg == NULL) + return -1; + + for (i = 0; i < cfg->num_sections; i++) { + if (cfg->sections[i] != NULL) { + if (cfg->sections[i]->num_entries) { + for (j = 0; j < cfg->sections[i]->num_entries; + j++) { + if (cfg->sections[i]->entries[j] != + NULL) + free(cfg->sections[i]-> + entries[j]); + } + } + free(cfg->sections[i]); + } + } + free(cfg); + + return 0; +} + +int +rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sectionname, +size_t length) +{ + int i; + int num_sections = 0; + for (i = 0; i < cfg->num_sections; i++) { + if (strncmp(cfg->sections[i]->name, sectionname, length) == 0) + num_sections++; + } + return num_sections; +} + +int +rte_cfgfile_sections(struct rte_cfgfile *cfg, char *sections[], + int max_sections) +{ + int i; + + for (i = 0; i < cfg->num_sections && i < max_sections; i++) + snprintf(sections[i], CFG_NAME_LEN, "%s", + cfg->sections[i]->name); + + return i; +} + +static const struct rte_cfgfile_section * +_get_section(struct rte_cfgfile *cfg, const char *sectionname) +{ + int i; + for (i = 0; i < cfg->num_sections; i++) { + if (strncmp(cfg->sections[i]->name, sectionname, + sizeof(cfg->sections[0]->name)) == 0) + return cfg->sections[i]; + } + return NULL; +} + +int +rte_cfgfile_has_section(struct rte_cfgfile *cfg, const char *sectionname) +{ + return (_get_section(cfg, sectionname) != NULL); +} + +int +rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg, + const char *sectionname) +{ + const struct rte_cfgfile_section *s = _get_section(cfg, sectionname); + if (s == NULL) + return -1; + return s->num_entries; +} + + +int +rte_cfgfile_section_entries(struct rte_cfgfile *cfg, const char *sectionname, + struct rte_cfgfile_entry *entries, int max_entries) +{ + int i; + const struct rte_cfgfile_section *sect = _get_section(cfg, sectionname); + if (sect == NULL) + return -1; + for (i = 0; i < max_entries && i < sect->num_entries; i++) + entries[i] = *sect->entries[i]; + return i; +} + +const char * +rte_cfgfile_get_entry(struct rte_cfgfile *cfg, const char *sectionname, + const char *entryname) +{ + int i; + const struct rte_cfgfile_section *sect = _get_section(cfg, sectionname); + if (sect == NULL) + return NULL; + for (i = 0; i < sect->num_entries; i++) + if (strncmp(sect->entries[i]->name, entryname, CFG_NAME_LEN) + == 0) + return sect->entries[i]->value; + return NULL; +} + +int +rte_cfgfile_has_entry(struct rte_cfgfile *cfg, const char *sectionname, + const char *entryname) +{ + return (rte_cfgfile_get_entry(cfg, sectionname, entryname) != NULL); +} diff --git a/src/dpdk22/lib/librte_cfgfile/rte_cfgfile.h b/src/dpdk22/lib/librte_cfgfile/rte_cfgfile.h new file mode 100644 index 00000000..d4437826 --- /dev/null +++ b/src/dpdk22/lib/librte_cfgfile/rte_cfgfile.h @@ -0,0 +1,200 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_CFGFILE_H__ +#define __INCLUDE_RTE_CFGFILE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** +* @file +* RTE Configuration File +* +* This library allows reading application defined parameters from standard +* format configuration file. +* +***/ + +#ifndef CFG_NAME_LEN +#define CFG_NAME_LEN 64 +#endif + +#ifndef CFG_VALUE_LEN +#define CFG_VALUE_LEN 256 +#endif + +/** Configuration file */ +struct rte_cfgfile; + +/** Configuration file entry */ +struct rte_cfgfile_entry { + char name[CFG_NAME_LEN]; /**< Name */ + char value[CFG_VALUE_LEN]; /**< Value */ +}; + +/** +* Open config file +* +* @param filename +* Config file name +* @param flags +* Config file flags, Reserved for future use. Must be set to 0. +* @return +* Handle to configuration file +*/ +struct rte_cfgfile *rte_cfgfile_load(const char *filename, int flags); + +/** +* Get number of sections in config file +* +* @param cfg +* Config file +* @param sec_name +* Section name +* @param length +* Maximum section name length +* @return +* 0 on success, error code otherwise +*/ +int rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sec_name, + size_t length); + +/** +* Get name of all config file sections. +* +* Fills in the array sections with the name of all the sections in the file +* (up to the number of max_sections sections). +* +* @param cfg +* Config file +* @param sections +* Array containing section names after successful invocation. Each elemen +* of this array should be preallocated by the user with at least +* CFG_NAME_LEN characters. +* @param max_sections +* Maximum number of section names to be stored in sections array +* @return +* 0 on success, error code otherwise +*/ +int rte_cfgfile_sections(struct rte_cfgfile *cfg, char *sections[], + int max_sections); + +/** +* Check if given section exists in config file +* +* @param cfg +* Config file +* @param sectionname +* Section name +* @return +* TRUE (value different than 0) if section exists, FALSE (value 0) otherwise +*/ +int rte_cfgfile_has_section(struct rte_cfgfile *cfg, const char *sectionname); + +/** +* Get number of entries in given config file section +* +* @param cfg +* Config file +* @param sectionname +* Section name +* @return +* Number of entries in section +*/ +int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg, + const char *sectionname); + +/** Get section entries as key-value pairs +* +* @param cfg +* Config file +* @param sectionname +* Section name +* @param entries +* Pre-allocated array of at least max_entries entries where the section +* entries are stored as key-value pair after successful invocation +* @param max_entries +* Maximum number of section entries to be stored in entries array +* @return +* 0 on success, error code otherwise +*/ +int rte_cfgfile_section_entries(struct rte_cfgfile *cfg, + const char *sectionname, + struct rte_cfgfile_entry *entries, + int max_entries); + +/** Get value of the named entry in named config file section +* +* @param cfg +* Config file +* @param sectionname +* Section name +* @param entryname +* Entry name +* @return +* Entry value +*/ +const char *rte_cfgfile_get_entry(struct rte_cfgfile *cfg, + const char *sectionname, + const char *entryname); + +/** Check if given entry exists in named config file section +* +* @param cfg +* Config file +* @param sectionname +* Section name +* @param entryname +* Entry name +* @return +* TRUE (value different than 0) if entry exists, FALSE (value 0) otherwise +*/ +int rte_cfgfile_has_entry(struct rte_cfgfile *cfg, const char *sectionname, + const char *entryname); + +/** Close config file +* +* @param cfg +* Config file +* @return +* 0 on success, error code otherwise +*/ +int rte_cfgfile_close(struct rte_cfgfile *cfg); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_compat/rte_compat.h b/src/dpdk22/lib/librte_compat/rte_compat.h new file mode 100644 index 00000000..1c3c8d52 --- /dev/null +++ b/src/dpdk22/lib/librte_compat/rte_compat.h @@ -0,0 +1,105 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Neil Horman . + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_COMPAT_H_ +#define _RTE_COMPAT_H_ +#include + +#ifdef RTE_BUILD_SHARED_LIB + +/* + * Provides backwards compatibility when updating exported functions. + * When a symol is exported from a library to provide an API, it also provides a + * calling convention (ABI) that is embodied in its name, return type, + * arguments, etc. On occasion that function may need to change to accommodate + * new functionality, behavior, etc. When that occurs, it is desireable to + * allow for backwards compatibility for a time with older binaries that are + * dynamically linked to the dpdk. To support that, the __vsym and + * VERSION_SYMBOL macros are created. They, in conjunction with the + * _version.map file for a given library allow for multiple versions of + * a symbol to exist in a shared library so that older binaries need not be + * immediately recompiled. + * + * Refer to the guidelines document in the docs subdirectory for details on the + * use of these macros + */ + +/* + * Macro Parameters: + * b - function base name + * e - function version extension, to be concatenated with base name + * n - function symbol version string to be applied + * f - function prototype + * p - full function symbol name + */ + +/* + * VERSION_SYMBOL + * Creates a symbol version table entry binding symbol @DPDK_ to the internal + * function name _ + */ +#define VERSION_SYMBOL(b, e, n) __asm__(".symver " RTE_STR(b) RTE_STR(e) ", " RTE_STR(b) "@DPDK_" RTE_STR(n)) + +/* + * BIND_DEFAULT_SYMBOL + * Creates a symbol version entry instructing the linker to bind references to + * symbol to the internal symbol _ + */ +#define BIND_DEFAULT_SYMBOL(b, e, n) __asm__(".symver " RTE_STR(b) RTE_STR(e) ", " RTE_STR(b) "@@DPDK_" RTE_STR(n)) +#define __vsym __attribute__((used)) + +/* + * MAP_STATIC_SYMBOL + * If a function has been bifurcated into multiple versions, none of which + * are defined as the exported symbol name in the map file, this macro can be + * used to alias a specific version of the symbol to its exported name. For + * example, if you have 2 versions of a function foo_v1 and foo_v2, where the + * former is mapped to foo@DPDK_1 and the latter is mapped to foo@DPDK_2 when + * building a shared library, this macro can be used to map either foo_v1 or + * foo_v2 to the symbol foo when building a static library, e.g.: + * MAP_STATIC_SYMBOL(void foo(), foo_v2); + */ +#define MAP_STATIC_SYMBOL(f, p) + +#else +/* + * No symbol versioning in use + */ +#define VERSION_SYMBOL(b, e, n) +#define __vsym +#define BIND_DEFAULT_SYMBOL(b, e, n) +#define MAP_STATIC_SYMBOL(f, p) f __attribute__((alias(RTE_STR(p)))) +/* + * RTE_BUILD_SHARED_LIB=n + */ +#endif + + +#endif /* _RTE_COMPAT_H_ */ diff --git a/src/dpdk22/lib/librte_distributor/rte_distributor.h b/src/dpdk22/lib/librte_distributor/rte_distributor.h new file mode 100644 index 00000000..7d36bc8a --- /dev/null +++ b/src/dpdk22/lib/librte_distributor/rte_distributor.h @@ -0,0 +1,247 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_DISTRIBUTE_H_ +#define _RTE_DISTRIBUTE_H_ + +/** + * @file + * RTE distributor + * + * The distributor is a component which is designed to pass packets + * one-at-a-time to workers, with dynamic load balancing. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_DISTRIBUTOR_NAMESIZE 32 /**< Length of name for instance */ + +struct rte_distributor; +struct rte_mbuf; + +/** + * Function to create a new distributor instance + * + * Reserves the memory needed for the distributor operation and + * initializes the distributor to work with the configured number of workers. + * + * @param name + * The name to be given to the distributor instance. + * @param socket_id + * The NUMA node on which the memory is to be allocated + * @param num_workers + * The maximum number of workers that will request packets from this + * distributor + * @return + * The newly created distributor instance + */ +struct rte_distributor * +rte_distributor_create(const char *name, unsigned socket_id, + unsigned num_workers); + +/* *** APIS to be called on the distributor lcore *** */ +/* + * The following APIs are the public APIs which are designed for use on a + * single lcore which acts as the distributor lcore for a given distributor + * instance. These functions cannot be called on multiple cores simultaneously + * without using locking to protect access to the internals of the distributor. + * + * NOTE: a given lcore cannot act as both a distributor lcore and a worker lcore + * for the same distributor instance, otherwise deadlock will result. + */ + +/** + * Process a set of packets by distributing them among workers that request + * packets. The distributor will ensure that no two packets that have the + * same flow id, or tag, in the mbuf will be procesed at the same time. + * + * The user is advocated to set tag for each mbuf before calling this function. + * If user doesn't set the tag, the tag value can be various values depending on + * driver implementation and configuration. + * + * This is not multi-thread safe and should only be called on a single lcore. + * + * @param d + * The distributor instance to be used + * @param mbufs + * The mbufs to be distributed + * @param num_mbufs + * The number of mbufs in the mbufs array + * @return + * The number of mbufs processed. + */ +int +rte_distributor_process(struct rte_distributor *d, + struct rte_mbuf **mbufs, unsigned num_mbufs); + +/** + * Get a set of mbufs that have been returned to the distributor by workers + * + * This should only be called on the same lcore as rte_distributor_process() + * + * @param d + * The distributor instance to be used + * @param mbufs + * The mbufs pointer array to be filled in + * @param max_mbufs + * The size of the mbufs array + * @return + * The number of mbufs returned in the mbufs array. + */ +int +rte_distributor_returned_pkts(struct rte_distributor *d, + struct rte_mbuf **mbufs, unsigned max_mbufs); + +/** + * Flush the distributor component, so that there are no in-flight or + * backlogged packets awaiting processing + * + * This should only be called on the same lcore as rte_distributor_process() + * + * @param d + * The distributor instance to be used + * @return + * The number of queued/in-flight packets that were completed by this call. + */ +int +rte_distributor_flush(struct rte_distributor *d); + +/** + * Clears the array of returned packets used as the source for the + * rte_distributor_returned_pkts() API call. + * + * This should only be called on the same lcore as rte_distributor_process() + * + * @param d + * The distributor instance to be used + */ +void +rte_distributor_clear_returns(struct rte_distributor *d); + +/* *** APIS to be called on the worker lcores *** */ +/* + * The following APIs are the public APIs which are designed for use on + * multiple lcores which act as workers for a distributor. Each lcore should use + * a unique worker id when requesting packets. + * + * NOTE: a given lcore cannot act as both a distributor lcore and a worker lcore + * for the same distributor instance, otherwise deadlock will result. + */ + +/** + * API called by a worker to get a new packet to process. Any previous packet + * given to the worker is assumed to have completed processing, and may be + * optionally returned to the distributor via the oldpkt parameter. + * + * @param d + * The distributor instance to be used + * @param worker_id + * The worker instance number to use - must be less that num_workers passed + * at distributor creation time. + * @param oldpkt + * The previous packet, if any, being processed by the worker + * + * @return + * A new packet to be processed by the worker thread. + */ +struct rte_mbuf * +rte_distributor_get_pkt(struct rte_distributor *d, + unsigned worker_id, struct rte_mbuf *oldpkt); + +/** + * API called by a worker to return a completed packet without requesting a + * new packet, for example, because a worker thread is shutting down + * + * @param d + * The distributor instance to be used + * @param worker_id + * The worker instance number to use - must be less that num_workers passed + * at distributor creation time. + * @param mbuf + * The previous packet being processed by the worker + */ +int +rte_distributor_return_pkt(struct rte_distributor *d, unsigned worker_id, + struct rte_mbuf *mbuf); + +/** + * API called by a worker to request a new packet to process. + * Any previous packet given to the worker is assumed to have completed + * processing, and may be optionally returned to the distributor via + * the oldpkt parameter. + * Unlike rte_distributor_get_pkt(), this function does not wait for a new + * packet to be provided by the distributor. + * + * NOTE: after calling this function, rte_distributor_poll_pkt() should + * be used to poll for the packet requested. The rte_distributor_get_pkt() + * API should *not* be used to try and retrieve the new packet. + * + * @param d + * The distributor instance to be used + * @param worker_id + * The worker instance number to use - must be less that num_workers passed + * at distributor creation time. + * @param oldpkt + * The previous packet, if any, being processed by the worker + */ +void +rte_distributor_request_pkt(struct rte_distributor *d, + unsigned worker_id, struct rte_mbuf *oldpkt); + +/** + * API called by a worker to check for a new packet that was previously + * requested by a call to rte_distributor_request_pkt(). It does not wait + * for the new packet to be available, but returns NULL if the request has + * not yet been fulfilled by the distributor. + * + * @param d + * The distributor instance to be used + * @param worker_id + * The worker instance number to use - must be less that num_workers passed + * at distributor creation time. + * + * @return + * A new packet to be processed by the worker thread, or NULL if no + * packet is yet available. + */ +struct rte_mbuf * +rte_distributor_poll_pkt(struct rte_distributor *d, + unsigned worker_id); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_cpuflags.c b/src/dpdk22/lib/librte_eal/common/eal_common_cpuflags.c new file mode 100644 index 00000000..8ba7b30e --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_cpuflags.c @@ -0,0 +1,86 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +/* + * This should prevent use of advanced instruction sets in this file. Otherwise + * the check function itself could cause a crash. + */ +#ifdef __INTEL_COMPILER +#pragma optimize ("", off) +#else +#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#if GCC_VERSION > 404000 +#pragma GCC optimize ("O0") +#endif +#endif + +/** + * Checks if the machine is adequate for running the binary. If it is not, the + * program exits with status 1. + * The function attribute forces this function to be called before main(). But + * with ICC, the check is generated by the compiler. + */ +#ifndef __INTEL_COMPILER +void __attribute__ ((__constructor__)) +#else +void +#endif +rte_cpu_check_supported(void) +{ + /* This is generated at compile-time by the build system */ + static const enum rte_cpu_flag_t compile_time_flags[] = { + RTE_COMPILE_TIME_CPUFLAGS + }; + unsigned count = RTE_DIM(compile_time_flags), i; + int ret; + + for (i = 0; i < count; i++) { + ret = rte_cpu_get_flag_enabled(compile_time_flags[i]); + + if (ret < 0) { + fprintf(stderr, + "ERROR: CPU feature flag lookup failed with error %d\n", + ret); + exit(1); + } + if (!ret) { + fprintf(stderr, + "ERROR: This system does not support \"%s\".\n" + "Please check that RTE_MACHINE is set correctly.\n", + cpu_feature_table[compile_time_flags[i]].name); + exit(1); + } + } +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_dev.c b/src/dpdk22/lib/librte_eal/common/eal_common_dev.c new file mode 100644 index 00000000..a8a4146c --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_dev.c @@ -0,0 +1,152 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "eal_private.h" + +/** Global list of device drivers. */ +static struct rte_driver_list dev_driver_list = + TAILQ_HEAD_INITIALIZER(dev_driver_list); + +/* register a driver */ +void +rte_eal_driver_register(struct rte_driver *driver) +{ + TAILQ_INSERT_TAIL(&dev_driver_list, driver, next); +} + +/* unregister a driver */ +void +rte_eal_driver_unregister(struct rte_driver *driver) +{ + TAILQ_REMOVE(&dev_driver_list, driver, next); +} + +int +rte_eal_vdev_init(const char *name, const char *args) +{ + struct rte_driver *driver; + + if (name == NULL) + return -EINVAL; + + TAILQ_FOREACH(driver, &dev_driver_list, next) { + if (driver->type != PMD_VDEV) + continue; + + /* + * search a driver prefix in virtual device name. + * For example, if the driver is pcap PMD, driver->name + * will be "eth_pcap", but "name" will be "eth_pcapN". + * So use strncmp to compare. + */ + if (!strncmp(driver->name, name, strlen(driver->name))) + return driver->init(name, args); + } + + RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + return -EINVAL; +} + +int +rte_eal_dev_init(void) +{ + struct rte_devargs *devargs; + struct rte_driver *driver; + + /* + * Note that the dev_driver_list is populated here + * from calls made to rte_eal_driver_register from constructor functions + * embedded into PMD modules via the PMD_REGISTER_DRIVER macro + */ + + /* call the init function for each virtual device */ + TAILQ_FOREACH(devargs, &devargs_list, next) { + + if (devargs->type != RTE_DEVTYPE_VIRTUAL) + continue; + + if (rte_eal_vdev_init(devargs->virt.drv_name, + devargs->args)) { + RTE_LOG(ERR, EAL, "failed to initialize %s device\n", + devargs->virt.drv_name); + return -1; + } + } + + /* Once the vdevs are initalized, start calling all the pdev drivers */ + TAILQ_FOREACH(driver, &dev_driver_list, next) { + if (driver->type != PMD_PDEV) + continue; + /* PDEV drivers don't get passed any parameters */ + driver->init(NULL, NULL); + } + return 0; +} + +int +rte_eal_vdev_uninit(const char *name) +{ + struct rte_driver *driver; + + if (name == NULL) + return -EINVAL; + + TAILQ_FOREACH(driver, &dev_driver_list, next) { + if (driver->type != PMD_VDEV) + continue; + + /* + * search a driver prefix in virtual device name. + * For example, if the driver is pcap PMD, driver->name + * will be "eth_pcap", but "name" will be "eth_pcapN". + * So use strncmp to compare. + */ + if (!strncmp(driver->name, name, strlen(driver->name))) + return driver->uninit(name); + } + + RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + return -EINVAL; +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_devargs.c b/src/dpdk22/lib/librte_eal/common/eal_common_devargs.c new file mode 100644 index 00000000..5d075d04 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_devargs.c @@ -0,0 +1,177 @@ +/*- + * BSD LICENSE + * + * Copyright 2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* This file manages the list of devices and their arguments, as given + * by the user at startup + * + * Code here should not call rte_log since the EAL environment + * may not be initialized. + */ + +#include +#include + +#include +#include +#include "eal_private.h" + +/** Global list of user devices */ +struct rte_devargs_list devargs_list = + TAILQ_HEAD_INITIALIZER(devargs_list); + +int +rte_eal_parse_devargs_str(const char *devargs_str, + char **drvname, char **drvargs) +{ + char *sep; + + if ((devargs_str) == NULL || (drvname) == NULL || (drvargs == NULL)) + return -1; + + *drvname = strdup(devargs_str); + if (drvname == NULL) + return -1; + + /* set the first ',' to '\0' to split name and arguments */ + sep = strchr(*drvname, ','); + if (sep != NULL) { + sep[0] = '\0'; + *drvargs = strdup(sep + 1); + } else { + *drvargs = strdup(""); + } + + if (*drvargs == NULL) { + free(*drvname); + return -1; + } + return 0; +} + +/* store a whitelist parameter for later parsing */ +int +rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str) +{ + struct rte_devargs *devargs = NULL; + char *buf = NULL; + int ret; + + /* use malloc instead of rte_malloc as it's called early at init */ + devargs = malloc(sizeof(*devargs)); + if (devargs == NULL) + goto fail; + + memset(devargs, 0, sizeof(*devargs)); + devargs->type = devtype; + + if (rte_eal_parse_devargs_str(devargs_str, &buf, &devargs->args)) + goto fail; + + switch (devargs->type) { + case RTE_DEVTYPE_WHITELISTED_PCI: + case RTE_DEVTYPE_BLACKLISTED_PCI: + /* try to parse pci identifier */ + if (eal_parse_pci_BDF(buf, &devargs->pci.addr) != 0 && + eal_parse_pci_DomBDF(buf, &devargs->pci.addr) != 0) + goto fail; + + break; + case RTE_DEVTYPE_VIRTUAL: + /* save driver name */ + ret = snprintf(devargs->virt.drv_name, + sizeof(devargs->virt.drv_name), "%s", buf); + if (ret < 0 || ret >= (int)sizeof(devargs->virt.drv_name)) + goto fail; + + break; + } + + free(buf); + TAILQ_INSERT_TAIL(&devargs_list, devargs, next); + return 0; + +fail: + if (buf) + free(buf); + if (devargs) { + free(devargs->args); + free(devargs); + } + + return -1; +} + +/* count the number of devices of a specified type */ +unsigned int +rte_eal_devargs_type_count(enum rte_devtype devtype) +{ + struct rte_devargs *devargs; + unsigned int count = 0; + + TAILQ_FOREACH(devargs, &devargs_list, next) { + if (devargs->type != devtype) + continue; + count++; + } + return count; +} + +/* dump the user devices on the console */ +void +rte_eal_devargs_dump(FILE *f) +{ + struct rte_devargs *devargs; + + fprintf(f, "User device white list:\n"); + TAILQ_FOREACH(devargs, &devargs_list, next) { + if (devargs->type == RTE_DEVTYPE_WHITELISTED_PCI) + fprintf(f, " PCI whitelist " PCI_PRI_FMT " %s\n", + devargs->pci.addr.domain, + devargs->pci.addr.bus, + devargs->pci.addr.devid, + devargs->pci.addr.function, + devargs->args); + else if (devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) + fprintf(f, " PCI blacklist " PCI_PRI_FMT " %s\n", + devargs->pci.addr.domain, + devargs->pci.addr.bus, + devargs->pci.addr.devid, + devargs->pci.addr.function, + devargs->args); + else if (devargs->type == RTE_DEVTYPE_VIRTUAL) + fprintf(f, " VIRTUAL %s %s\n", + devargs->virt.drv_name, + devargs->args); + else + fprintf(f, " UNKNOWN %s\n", devargs->args); + } +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_errno.c b/src/dpdk22/lib/librte_eal/common/eal_common_errno.c new file mode 100644 index 00000000..de48d8e4 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_errno.c @@ -0,0 +1,72 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +RTE_DEFINE_PER_LCORE(int, _rte_errno); + +const char * +rte_strerror(int errnum) +{ +#define RETVAL_SZ 256 + static RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval); + + /* since some implementations of strerror_r throw an error + * themselves if errnum is too big, we handle that case here */ + if (errnum > RTE_MAX_ERRNO) + snprintf(RTE_PER_LCORE(retval), RETVAL_SZ, +#ifdef RTE_EXEC_ENV_BSDAPP + "Unknown error: %d", errnum); +#else + "Unknown error %d", errnum); +#endif + else + switch (errnum){ + case E_RTE_SECONDARY: + return "Invalid call in secondary process"; + case E_RTE_NO_CONFIG: + return "Missing rte_config structure"; + default: + strerror_r(errnum, RTE_PER_LCORE(retval), RETVAL_SZ); + } + + return RTE_PER_LCORE(retval); +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_hexdump.c b/src/dpdk22/lib/librte_eal/common/eal_common_hexdump.c new file mode 100644 index 00000000..d5cbd703 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_hexdump.c @@ -0,0 +1,120 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include + +#define LINE_LEN 128 + +/**************************************************************************//** +* +* rte_hexdump - Dump out memory in a special hex dump format. +* +* DESCRIPTION +* Dump out the message buffer in a special hex dump output format with characters +* printed for each line of 16 hex values. +* +* RETURNS: N/A +* +* SEE ALSO: +*/ + +void +rte_hexdump(FILE *f, const char * title, const void * buf, unsigned int len) +{ + unsigned int i, out, ofs; + const unsigned char *data = buf; + char line[LINE_LEN]; /* space needed 8+16*3+3+16 == 75 */ + + fprintf(f, "%s at [%p], len=%u\n", (title)? title : " Dump data", data, len); + ofs = 0; + while (ofs < len) { + /* format the line in the buffer, then use printf to output to screen */ + out = snprintf(line, LINE_LEN, "%08X:", ofs); + for (i = 0; ((ofs + i) < len) && (i < 16); i++) + out += snprintf(line+out, LINE_LEN - out, " %02X", (data[ofs+i] & 0xff)); + for(; i <= 16; i++) + out += snprintf(line+out, LINE_LEN - out, " | "); + for(i = 0; (ofs < len) && (i < 16); i++, ofs++) { + unsigned char c = data[ofs]; + if ( (c < ' ') || (c > '~')) + c = '.'; + out += snprintf(line+out, LINE_LEN - out, "%c", c); + } + fprintf(f, "%s\n", line); + } + fflush(f); +} + +/**************************************************************************//** +* +* rte_memdump - Dump out memory in hex bytes with colons. +* +* DESCRIPTION +* Dump out the message buffer in hex bytes with colons xx:xx:xx:xx:... +* +* RETURNS: N/A +* +* SEE ALSO: +*/ + +void +rte_memdump(FILE *f, const char * title, const void * buf, unsigned int len) +{ + unsigned int i, out; + const unsigned char *data = buf; + char line[LINE_LEN]; + + if ( title ) + fprintf(f, "%s: ", title); + + line[0] = '\0'; + for (i = 0, out = 0; i < len; i++) { + // Make sure we do not overrun the line buffer length. + if ( out >= (LINE_LEN - 4) ) { + fprintf(f, "%s", line); + out = 0; + line[out] = '\0'; + } + out += snprintf(line+out, LINE_LEN - out, "%02x%s", + (data[i] & 0xff), ((i+1) < len)? ":" : ""); + } + if ( out > 0 ) + fprintf(f, "%s", line); + fprintf(f, "\n"); + + fflush(f); +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_launch.c b/src/dpdk22/lib/librte_eal/common/eal_common_launch.c new file mode 100644 index 00000000..229c3a03 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_launch.c @@ -0,0 +1,118 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* + * Wait until a lcore finished its job. + */ +int +rte_eal_wait_lcore(unsigned slave_id) +{ + if (lcore_config[slave_id].state == WAIT) + return 0; + + while (lcore_config[slave_id].state != WAIT && + lcore_config[slave_id].state != FINISHED); + + rte_rmb(); + + /* we are in finished state, go to wait state */ + lcore_config[slave_id].state = WAIT; + return lcore_config[slave_id].ret; +} + +/* + * Check that every SLAVE lcores are in WAIT state, then call + * rte_eal_remote_launch() for all of them. If call_master is true + * (set to CALL_MASTER), also call the function on the master lcore. + */ +int +rte_eal_mp_remote_launch(int (*f)(void *), void *arg, + enum rte_rmt_call_master_t call_master) +{ + int lcore_id; + int master = rte_get_master_lcore(); + + /* check state of lcores */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (lcore_config[lcore_id].state != WAIT) + return -EBUSY; + } + + /* send messages to cores */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + rte_eal_remote_launch(f, arg, lcore_id); + } + + if (call_master == CALL_MASTER) { + lcore_config[master].ret = f(arg); + lcore_config[master].state = FINISHED; + } + + return 0; +} + +/* + * Return the state of the lcore identified by slave_id. + */ +enum rte_lcore_state_t +rte_eal_get_lcore_state(unsigned lcore_id) +{ + return lcore_config[lcore_id].state; +} + +/* + * Do a rte_eal_wait_lcore() for every lcore. The return values are + * ignored. + */ +void +rte_eal_mp_wait_lcore(void) +{ + unsigned lcore_id; + + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + rte_eal_wait_lcore(lcore_id); + } +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_lcore.c b/src/dpdk22/lib/librte_eal/common/eal_common_lcore.c new file mode 100644 index 00000000..a4263ba5 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_lcore.c @@ -0,0 +1,110 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_thread.h" + +/* + * Parse /sys/devices/system/cpu to get the number of physical and logical + * processors on the machine. The function will fill the cpu_info + * structure. + */ +int +rte_eal_cpu_init(void) +{ + /* pointer to global configuration */ + struct rte_config *config = rte_eal_get_configuration(); + unsigned lcore_id; + unsigned count = 0; + + /* + * Parse the maximum set of logical cores, detect the subset of running + * ones and enable them by default. + */ + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + lcore_config[lcore_id].core_index = count; + + /* init cpuset for per lcore config */ + CPU_ZERO(&lcore_config[lcore_id].cpuset); + + /* in 1:1 mapping, record related cpu detected state */ + lcore_config[lcore_id].detected = eal_cpu_detected(lcore_id); + if (lcore_config[lcore_id].detected == 0) { + config->lcore_role[lcore_id] = ROLE_OFF; + lcore_config[lcore_id].core_index = -1; + continue; + } + + /* By default, lcore 1:1 map to cpu id */ + CPU_SET(lcore_id, &lcore_config[lcore_id].cpuset); + + /* By default, each detected core is enabled */ + config->lcore_role[lcore_id] = ROLE_RTE; + lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id); + lcore_config[lcore_id].socket_id = eal_cpu_socket_id(lcore_id); + if (lcore_config[lcore_id].socket_id >= RTE_MAX_NUMA_NODES) +#ifdef RTE_EAL_ALLOW_INV_SOCKET_ID + lcore_config[lcore_id].socket_id = 0; +#else + rte_panic("Socket ID (%u) is greater than " + "RTE_MAX_NUMA_NODES (%d)\n", + lcore_config[lcore_id].socket_id, + RTE_MAX_NUMA_NODES); +#endif + + RTE_LOG(DEBUG, EAL, "Detected lcore %u as " + "core %u on socket %u\n", + lcore_id, lcore_config[lcore_id].core_id, + lcore_config[lcore_id].socket_id); + count++; + } + /* Set the count of enabled logical cores of the EAL configuration */ + config->lcore_count = count; + RTE_LOG(DEBUG, EAL, + "Support maximum %u logical core(s) by configuration.\n", + RTE_MAX_LCORE); + RTE_LOG(DEBUG, EAL, "Detected %u lcore(s)\n", config->lcore_count); + + return 0; +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_log.c b/src/dpdk22/lib/librte_eal/common/eal_common_log.c new file mode 100644 index 00000000..1ae8de70 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_log.c @@ -0,0 +1,337 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" + +#define LOG_ELT_SIZE 2048 + +#define LOG_HISTORY_MP_NAME "log_history" + +STAILQ_HEAD(log_history_list, log_history); + +/** + * The structure of a message log in the log history. + */ +struct log_history { + STAILQ_ENTRY(log_history) next; + unsigned size; + char buf[0]; +}; + +static struct rte_mempool *log_history_mp = NULL; +static unsigned log_history_size = 0; +static struct log_history_list log_history; + +/* global log structure */ +struct rte_logs rte_logs = { + .type = ~0, + .level = RTE_LOG_DEBUG, + .file = NULL, +}; + +static rte_spinlock_t log_dump_lock = RTE_SPINLOCK_INITIALIZER; +static rte_spinlock_t log_list_lock = RTE_SPINLOCK_INITIALIZER; +static FILE *default_log_stream; +static int history_enabled = 1; + +/** + * This global structure stores some informations about the message + * that is currently beeing processed by one lcore + */ +struct log_cur_msg { + uint32_t loglevel; /**< log level - see rte_log.h */ + uint32_t logtype; /**< log type - see rte_log.h */ +} __rte_cache_aligned; +static struct log_cur_msg log_cur_msg[RTE_MAX_LCORE]; /**< per core log */ + + +/* default logs */ + +int +rte_log_add_in_history(const char *buf, size_t size) +{ + struct log_history *hist_buf = NULL; + static const unsigned hist_buf_size = LOG_ELT_SIZE - sizeof(*hist_buf); + void *obj; + + if (history_enabled == 0) + return 0; + + rte_spinlock_lock(&log_list_lock); + + /* get a buffer for adding in history */ + if (log_history_size > RTE_LOG_HISTORY) { + hist_buf = STAILQ_FIRST(&log_history); + if (hist_buf) { + STAILQ_REMOVE_HEAD(&log_history, next); + log_history_size--; + } + } + else { + if (rte_mempool_mc_get(log_history_mp, &obj) < 0) + obj = NULL; + hist_buf = obj; + } + + /* no buffer */ + if (hist_buf == NULL) { + rte_spinlock_unlock(&log_list_lock); + return -ENOBUFS; + } + + /* not enough room for msg, buffer go back in mempool */ + if (size >= hist_buf_size) { + rte_mempool_mp_put(log_history_mp, hist_buf); + rte_spinlock_unlock(&log_list_lock); + return -ENOBUFS; + } + + /* add in history */ + memcpy(hist_buf->buf, buf, size); + hist_buf->buf[size] = hist_buf->buf[hist_buf_size-1] = '\0'; + hist_buf->size = size; + STAILQ_INSERT_TAIL(&log_history, hist_buf, next); + log_history_size++; + rte_spinlock_unlock(&log_list_lock); + + return 0; +} + +void +rte_log_set_history(int enable) +{ + history_enabled = enable; +} + +/* Change the stream that will be used by logging system */ +int +rte_openlog_stream(FILE *f) +{ + if (f == NULL) + rte_logs.file = default_log_stream; + else + rte_logs.file = f; + return 0; +} + +/* Set global log level */ +void +rte_set_log_level(uint32_t level) +{ + rte_logs.level = (uint32_t)level; +} + +/* Get global log level */ +uint32_t +rte_get_log_level(void) +{ + return rte_logs.level; +} + +/* Set global log type */ +void +rte_set_log_type(uint32_t type, int enable) +{ + if (enable) + rte_logs.type |= type; + else + rte_logs.type &= (~type); +} + +/* Get global log type */ +uint32_t +rte_get_log_type(void) +{ + return rte_logs.type; +} + +/* get the current loglevel for the message beeing processed */ +int rte_log_cur_msg_loglevel(void) +{ + unsigned lcore_id; + lcore_id = rte_lcore_id(); + if (lcore_id >= RTE_MAX_LCORE) + return rte_get_log_level(); + return log_cur_msg[lcore_id].loglevel; +} + +/* get the current logtype for the message beeing processed */ +int rte_log_cur_msg_logtype(void) +{ + unsigned lcore_id; + lcore_id = rte_lcore_id(); + if (lcore_id >= RTE_MAX_LCORE) + return rte_get_log_type(); + return log_cur_msg[lcore_id].logtype; +} + +/* Dump log history to file */ +void +rte_log_dump_history(FILE *out) +{ + struct log_history_list tmp_log_history; + struct log_history *hist_buf; + unsigned i; + + /* only one dump at a time */ + rte_spinlock_lock(&log_dump_lock); + + /* save list, and re-init to allow logging during dump */ + rte_spinlock_lock(&log_list_lock); + tmp_log_history = log_history; + STAILQ_INIT(&log_history); + log_history_size = 0; + rte_spinlock_unlock(&log_list_lock); + + for (i=0; ibuf, hist_buf->size, 1, out) == 0) { + rte_mempool_mp_put(log_history_mp, hist_buf); + break; + } + + /* put back message structure in pool */ + rte_mempool_mp_put(log_history_mp, hist_buf); + } + fflush(out); + + rte_spinlock_unlock(&log_dump_lock); +} + +/* + * Generates a log message The message will be sent in the stream + * defined by the previous call to rte_openlog_stream(). + */ +int +rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) +{ + int ret; + FILE *f = rte_logs.file; + unsigned lcore_id; + + if ((level > rte_logs.level) || !(logtype & rte_logs.type)) + return 0; + + /* save loglevel and logtype in a global per-lcore variable */ + lcore_id = rte_lcore_id(); + if (lcore_id < RTE_MAX_LCORE) { + log_cur_msg[lcore_id].loglevel = level; + log_cur_msg[lcore_id].logtype = logtype; + } + + ret = vfprintf(f, format, ap); + fflush(f); + return ret; +} + +/* + * Generates a log message The message will be sent in the stream + * defined by the previous call to rte_openlog_stream(). + * No need to check level here, done by rte_vlog(). + */ +int +rte_log(uint32_t level, uint32_t logtype, const char *format, ...) +{ + va_list ap; + int ret; + + va_start(ap, format); + ret = rte_vlog(level, logtype, format, ap); + va_end(ap); + return ret; +} + +/* + * called by environment-specific log init function to initialize log + * history + */ +int +rte_eal_common_log_init(FILE *default_log) +{ + STAILQ_INIT(&log_history); + + /* reserve RTE_LOG_HISTORY*2 elements, so we can dump and + * keep logging during this time */ + log_history_mp = rte_mempool_create(LOG_HISTORY_MP_NAME, RTE_LOG_HISTORY*2, + LOG_ELT_SIZE, 0, 0, + NULL, NULL, + NULL, NULL, + SOCKET_ID_ANY, 0); + + if ((log_history_mp == NULL) && + ((log_history_mp = rte_mempool_lookup(LOG_HISTORY_MP_NAME)) == NULL)){ + RTE_LOG(ERR, EAL, "%s(): cannot create log_history mempool\n", + __func__); + return -1; + } + + default_log_stream = default_log; + rte_openlog_stream(default_log); + return 0; +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_memory.c b/src/dpdk22/lib/librte_eal/common/eal_common_memory.c new file mode 100644 index 00000000..b6475737 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_memory.c @@ -0,0 +1,154 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_internal_cfg.h" + +/* + * Return a pointer to a read-only table of struct rte_physmem_desc + * elements, containing the layout of all addressable physical + * memory. The last element of the table contains a NULL address. + */ +const struct rte_memseg * +rte_eal_get_physmem_layout(void) +{ + return rte_eal_get_configuration()->mem_config->memseg; +} + + +/* get the total size of memory */ +uint64_t +rte_eal_get_physmem_size(void) +{ + const struct rte_mem_config *mcfg; + unsigned i = 0; + uint64_t total_len = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + if (mcfg->memseg[i].addr == NULL) + break; + + total_len += mcfg->memseg[i].len; + } + + return total_len; +} + +/* Dump the physical memory layout on console */ +void +rte_dump_physmem_layout(FILE *f) +{ + const struct rte_mem_config *mcfg; + unsigned i = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + if (mcfg->memseg[i].addr == NULL) + break; + + fprintf(f, "Segment %u: phys:0x%"PRIx64", len:%zu, " + "virt:%p, socket_id:%"PRId32", " + "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", " + "nrank:%"PRIx32"\n", i, + mcfg->memseg[i].phys_addr, + mcfg->memseg[i].len, + mcfg->memseg[i].addr, + mcfg->memseg[i].socket_id, + mcfg->memseg[i].hugepage_sz, + mcfg->memseg[i].nchannel, + mcfg->memseg[i].nrank); + } +} + +/* return the number of memory channels */ +unsigned rte_memory_get_nchannel(void) +{ + return rte_eal_get_configuration()->mem_config->nchannel; +} + +/* return the number of memory rank */ +unsigned rte_memory_get_nrank(void) +{ + return rte_eal_get_configuration()->mem_config->nrank; +} + +static int +rte_eal_memdevice_init(void) +{ + struct rte_config *config; + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) + return 0; + + config = rte_eal_get_configuration(); + config->mem_config->nchannel = internal_config.force_nchannel; + config->mem_config->nrank = internal_config.force_nrank; + + return 0; +} + +/* init memory subsystem */ +int +rte_eal_memory_init(void) +{ + RTE_LOG(INFO, EAL, "Setting up physically contiguous memory...\n"); + + const int retval = rte_eal_process_type() == RTE_PROC_PRIMARY ? + rte_eal_hugepage_init() : + rte_eal_hugepage_attach(); + if (retval < 0) + return -1; + + if (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0) + return -1; + + return 0; +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_memzone.c b/src/dpdk22/lib/librte_eal/common/eal_common_memzone.c new file mode 100644 index 00000000..febc56b0 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_memzone.c @@ -0,0 +1,445 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "malloc_heap.h" +#include "malloc_elem.h" +#include "eal_private.h" + +static inline const struct rte_memzone * +memzone_lookup_thread_unsafe(const char *name) +{ + const struct rte_mem_config *mcfg; + const struct rte_memzone *mz; + unsigned i = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + /* + * the algorithm is not optimal (linear), but there are few + * zones and this function should be called at init only + */ + for (i = 0; i < RTE_MAX_MEMZONE; i++) { + mz = &mcfg->memzone[i]; + if (mz->addr != NULL && !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE)) + return &mcfg->memzone[i]; + } + + return NULL; +} + +static inline struct rte_memzone * +get_next_free_memzone(void) +{ + struct rte_mem_config *mcfg; + unsigned i = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + for (i = 0; i < RTE_MAX_MEMZONE; i++) { + if (mcfg->memzone[i].addr == NULL) + return &mcfg->memzone[i]; + } + + return NULL; +} + +/* This function will return the greatest free block if a heap has been + * specified. If no heap has been specified, it will return the heap and + * length of the greatest free block available in all heaps */ +static size_t +find_heap_max_free_elem(int *s, unsigned align) +{ + struct rte_mem_config *mcfg; + struct rte_malloc_socket_stats stats; + int i, socket = *s; + size_t len = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { + if ((socket != SOCKET_ID_ANY) && (socket != i)) + continue; + + malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats); + if (stats.greatest_free_size > len) { + len = stats.greatest_free_size; + *s = i; + } + } + + return (len - MALLOC_ELEM_OVERHEAD - align); +} + +static const struct rte_memzone * +memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, + int socket_id, unsigned flags, unsigned align, unsigned bound) +{ + struct rte_mem_config *mcfg; + size_t requested_len; + int socket, i; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + /* no more room in config */ + if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) { + RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__); + rte_errno = ENOSPC; + return NULL; + } + + /* zone already exist */ + if ((memzone_lookup_thread_unsafe(name)) != NULL) { + RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n", + __func__, name); + rte_errno = EEXIST; + return NULL; + } + + /* if alignment is not a power of two */ + if (align && !rte_is_power_of_2(align)) { + RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__, + align); + rte_errno = EINVAL; + return NULL; + } + + /* alignment less than cache size is not allowed */ + if (align < RTE_CACHE_LINE_SIZE) + align = RTE_CACHE_LINE_SIZE; + + /* align length on cache boundary. Check for overflow before doing so */ + if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) { + rte_errno = EINVAL; /* requested size too big */ + return NULL; + } + + len += RTE_CACHE_LINE_MASK; + len &= ~((size_t) RTE_CACHE_LINE_MASK); + + /* save minimal requested length */ + requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len); + + /* check that boundary condition is valid */ + if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) { + rte_errno = EINVAL; + return NULL; + } + + if ((socket_id != SOCKET_ID_ANY) && (socket_id >= RTE_MAX_NUMA_NODES)) { + rte_errno = EINVAL; + return NULL; + } + + if (!rte_eal_has_hugepages()) + socket_id = SOCKET_ID_ANY; + + if (len == 0) { + if (bound != 0) + requested_len = bound; + else + requested_len = find_heap_max_free_elem(&socket_id, align); + } + + if (socket_id == SOCKET_ID_ANY) + socket = malloc_get_numa_socket(); + else + socket = socket_id; + + /* allocate memory on heap */ + void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL, + requested_len, flags, align, bound); + + if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) { + /* try other heaps */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { + if (socket == i) + continue; + + mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i], + NULL, requested_len, flags, align, bound); + if (mz_addr != NULL) + break; + } + } + + if (mz_addr == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + const struct malloc_elem *elem = malloc_elem_from_data(mz_addr); + + /* fill the zone in config */ + struct rte_memzone *mz = get_next_free_memzone(); + + if (mz == NULL) { + RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room " + "in config!\n", __func__); + rte_errno = ENOSPC; + return NULL; + } + + mcfg->memzone_cnt++; + snprintf(mz->name, sizeof(mz->name), "%s", name); + mz->phys_addr = rte_malloc_virt2phy(mz_addr); + mz->addr = mz_addr; + mz->len = (requested_len == 0 ? elem->size : requested_len); + mz->hugepage_sz = elem->ms->hugepage_sz; + mz->socket_id = elem->ms->socket_id; + mz->flags = 0; + mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg; + + return mz; +} + +static const struct rte_memzone * +rte_memzone_reserve_thread_safe(const char *name, size_t len, + int socket_id, unsigned flags, unsigned align, + unsigned bound) +{ + struct rte_mem_config *mcfg; + const struct rte_memzone *mz = NULL; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_write_lock(&mcfg->mlock); + + mz = memzone_reserve_aligned_thread_unsafe( + name, len, socket_id, flags, align, bound); + + rte_rwlock_write_unlock(&mcfg->mlock); + + return mz; +} + +/* + * Return a pointer to a correctly filled memzone descriptor (with a + * specified alignment and boundary). If the allocation cannot be done, + * return NULL. + */ +const struct rte_memzone * +rte_memzone_reserve_bounded(const char *name, size_t len, int socket_id, + unsigned flags, unsigned align, unsigned bound) +{ + return rte_memzone_reserve_thread_safe(name, len, socket_id, flags, + align, bound); +} + +/* + * Return a pointer to a correctly filled memzone descriptor (with a + * specified alignment). If the allocation cannot be done, return NULL. + */ +const struct rte_memzone * +rte_memzone_reserve_aligned(const char *name, size_t len, int socket_id, + unsigned flags, unsigned align) +{ + return rte_memzone_reserve_thread_safe(name, len, socket_id, flags, + align, 0); +} + +/* + * Return a pointer to a correctly filled memzone descriptor. If the + * allocation cannot be done, return NULL. + */ +const struct rte_memzone * +rte_memzone_reserve(const char *name, size_t len, int socket_id, + unsigned flags) +{ + return rte_memzone_reserve_thread_safe(name, len, socket_id, + flags, RTE_CACHE_LINE_SIZE, 0); +} + +int +rte_memzone_free(const struct rte_memzone *mz) +{ + struct rte_mem_config *mcfg; + int ret = 0; + void *addr; + unsigned idx; + + if (mz == NULL) + return -EINVAL; + + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_write_lock(&mcfg->mlock); + + idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone); + idx = idx / sizeof(struct rte_memzone); + + addr = mcfg->memzone[idx].addr; +#ifdef RTE_LIBRTE_IVSHMEM + /* + * If ioremap_addr is set, it's an IVSHMEM memzone and we cannot + * free it. + */ + if (mcfg->memzone[idx].ioremap_addr != 0) + ret = -EINVAL; +#endif + if (addr == NULL) + ret = -EINVAL; + else if (mcfg->memzone_cnt == 0) { + rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!\n", + __func__); + } else { + memset(&mcfg->memzone[idx], 0, sizeof(mcfg->memzone[idx])); + mcfg->memzone_cnt--; + } + + rte_rwlock_write_unlock(&mcfg->mlock); + + rte_free(addr); + + return ret; +} + +/* + * Lookup for the memzone identified by the given name + */ +const struct rte_memzone * +rte_memzone_lookup(const char *name) +{ + struct rte_mem_config *mcfg; + const struct rte_memzone *memzone = NULL; + + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_read_lock(&mcfg->mlock); + + memzone = memzone_lookup_thread_unsafe(name); + + rte_rwlock_read_unlock(&mcfg->mlock); + + return memzone; +} + +/* Dump all reserved memory zones on console */ +void +rte_memzone_dump(FILE *f) +{ + struct rte_mem_config *mcfg; + unsigned i = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_read_lock(&mcfg->mlock); + /* dump all zones */ + for (i=0; imemzone[i].addr == NULL) + break; + fprintf(f, "Zone %u: name:<%s>, phys:0x%"PRIx64", len:0x%zx" + ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i, + mcfg->memzone[i].name, + mcfg->memzone[i].phys_addr, + mcfg->memzone[i].len, + mcfg->memzone[i].addr, + mcfg->memzone[i].socket_id, + mcfg->memzone[i].flags); + } + rte_rwlock_read_unlock(&mcfg->mlock); +} + +/* + * Init the memzone subsystem + */ +int +rte_eal_memzone_init(void) +{ + struct rte_mem_config *mcfg; + const struct rte_memseg *memseg; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + /* secondary processes don't need to initialise anything */ + if (rte_eal_process_type() == RTE_PROC_SECONDARY) + return 0; + + memseg = rte_eal_get_physmem_layout(); + if (memseg == NULL) { + RTE_LOG(ERR, EAL, "%s(): Cannot get physical layout\n", __func__); + return -1; + } + + rte_rwlock_write_lock(&mcfg->mlock); + + /* delete all zones */ + mcfg->memzone_cnt = 0; + memset(mcfg->memzone, 0, sizeof(mcfg->memzone)); + + rte_rwlock_write_unlock(&mcfg->mlock); + + return rte_eal_malloc_heap_init(); +} + +/* Walk all reserved memory zones */ +void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *), + void *arg) +{ + struct rte_mem_config *mcfg; + unsigned i; + + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_read_lock(&mcfg->mlock); + for (i=0; imemzone[i].addr != NULL) + (*func)(&mcfg->memzone[i], arg); + } + rte_rwlock_read_unlock(&mcfg->mlock); +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_options.c b/src/dpdk22/lib/librte_eal/common/eal_common_options.c new file mode 100644 index 00000000..29942ea6 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_options.c @@ -0,0 +1,1023 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "eal_internal_cfg.h" +#include "eal_options.h" +#include "eal_filesystem.h" + +#define BITS_PER_HEX 4 + +const char +eal_short_options[] = + "b:" /* pci-blacklist */ + "c:" /* coremask */ + "d:" /* driver */ + "h" /* help */ + "l:" /* corelist */ + "m:" /* memory size */ + "n:" /* memory channels */ + "r:" /* memory ranks */ + "v" /* version */ + "w:" /* pci-whitelist */ + ; + +const struct option +eal_long_options[] = { + {OPT_BASE_VIRTADDR, 1, NULL, OPT_BASE_VIRTADDR_NUM }, + {OPT_CREATE_UIO_DEV, 0, NULL, OPT_CREATE_UIO_DEV_NUM }, + {OPT_FILE_PREFIX, 1, NULL, OPT_FILE_PREFIX_NUM }, + {OPT_HELP, 0, NULL, OPT_HELP_NUM }, + {OPT_HUGE_DIR, 1, NULL, OPT_HUGE_DIR_NUM }, + {OPT_HUGE_UNLINK, 0, NULL, OPT_HUGE_UNLINK_NUM }, + {OPT_LCORES, 1, NULL, OPT_LCORES_NUM }, + {OPT_LOG_LEVEL, 1, NULL, OPT_LOG_LEVEL_NUM }, + {OPT_MASTER_LCORE, 1, NULL, OPT_MASTER_LCORE_NUM }, + {OPT_NO_HPET, 0, NULL, OPT_NO_HPET_NUM }, + {OPT_NO_HUGE, 0, NULL, OPT_NO_HUGE_NUM }, + {OPT_NO_PCI, 0, NULL, OPT_NO_PCI_NUM }, + {OPT_NO_SHCONF, 0, NULL, OPT_NO_SHCONF_NUM }, + {OPT_PCI_BLACKLIST, 1, NULL, OPT_PCI_BLACKLIST_NUM }, + {OPT_PCI_WHITELIST, 1, NULL, OPT_PCI_WHITELIST_NUM }, + {OPT_PROC_TYPE, 1, NULL, OPT_PROC_TYPE_NUM }, + {OPT_SOCKET_MEM, 1, NULL, OPT_SOCKET_MEM_NUM }, + {OPT_SYSLOG, 1, NULL, OPT_SYSLOG_NUM }, + {OPT_VDEV, 1, NULL, OPT_VDEV_NUM }, + {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM }, + {OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM }, + {OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM }, + {0, 0, NULL, 0 } +}; + +TAILQ_HEAD(shared_driver_list, shared_driver); + +/* Definition for shared object drivers. */ +struct shared_driver { + TAILQ_ENTRY(shared_driver) next; + + char name[PATH_MAX]; + void* lib_handle; +}; + +/* List of external loadable drivers */ +static struct shared_driver_list solib_list = +TAILQ_HEAD_INITIALIZER(solib_list); + +/* Default path of external loadable drivers */ +static const char *default_solib_dir = RTE_EAL_PMD_PATH; + +static int master_lcore_parsed; +static int mem_parsed; + +void +eal_reset_internal_config(struct internal_config *internal_cfg) +{ + int i; + + internal_cfg->memory = 0; + internal_cfg->force_nrank = 0; + internal_cfg->force_nchannel = 0; + internal_cfg->hugefile_prefix = HUGEFILE_PREFIX_DEFAULT; + internal_cfg->hugepage_dir = NULL; + internal_cfg->force_sockets = 0; + /* zero out the NUMA config */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + internal_cfg->socket_mem[i] = 0; + /* zero out hugedir descriptors */ + for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) + internal_cfg->hugepage_info[i].lock_descriptor = -1; + internal_cfg->base_virtaddr = 0; + + internal_cfg->syslog_facility = LOG_DAEMON; + /* default value from build option */ + internal_cfg->log_level = RTE_LOG_LEVEL; + + internal_cfg->xen_dom0_support = 0; + + /* if set to NONE, interrupt mode is determined automatically */ + internal_cfg->vfio_intr_mode = RTE_INTR_MODE_NONE; + +#ifdef RTE_LIBEAL_USE_HPET + internal_cfg->no_hpet = 0; +#else + internal_cfg->no_hpet = 1; +#endif + internal_cfg->vmware_tsc_map = 0; + internal_cfg->create_uio_dev = 0; +} + +static int +eal_plugin_add(const char *path) +{ + struct shared_driver *solib; + + solib = malloc(sizeof(*solib)); + if (solib == NULL) { + RTE_LOG(ERR, EAL, "malloc(solib) failed\n"); + return -1; + } + memset(solib, 0, sizeof(*solib)); + strncpy(solib->name, path, PATH_MAX-1); + solib->name[PATH_MAX-1] = 0; + TAILQ_INSERT_TAIL(&solib_list, solib, next); + + return 0; +} + +static int +eal_plugindir_init(const char *path) +{ + DIR *d = NULL; + struct dirent *dent = NULL; + char sopath[PATH_MAX]; + + if (path == NULL || *path == '\0') + return 0; + + d = opendir(path); + if (d == NULL) { + RTE_LOG(ERR, EAL, "failed to open directory %s: %s\n", + path, strerror(errno)); + return -1; + } + + while ((dent = readdir(d)) != NULL) { + struct stat sb; + + snprintf(sopath, PATH_MAX-1, "%s/%s", path, dent->d_name); + sopath[PATH_MAX-1] = 0; + + if (!(stat(sopath, &sb) == 0 && S_ISREG(sb.st_mode))) + continue; + + if (eal_plugin_add(sopath) == -1) + break; + } + + closedir(d); + /* XXX this ignores failures from readdir() itself */ + return (dent == NULL) ? 0 : -1; +} + +int +eal_plugins_init(void) +{ + struct shared_driver *solib = NULL; + + if (*default_solib_dir != '\0') + eal_plugin_add(default_solib_dir); + + TAILQ_FOREACH(solib, &solib_list, next) { + struct stat sb; + + if (stat(solib->name, &sb) == 0 && S_ISDIR(sb.st_mode)) { + if (eal_plugindir_init(solib->name) == -1) { + RTE_LOG(ERR, EAL, + "Cannot init plugin directory %s\n", + solib->name); + return -1; + } + } else { + RTE_LOG(DEBUG, EAL, "open shared lib %s\n", + solib->name); + solib->lib_handle = dlopen(solib->name, RTLD_NOW); + if (solib->lib_handle == NULL) { + RTE_LOG(ERR, EAL, "%s\n", dlerror()); + return -1; + } + } + + } + return 0; +} + +/* + * Parse the coremask given as argument (hexadecimal string) and fill + * the global configuration (core role and core count) with the parsed + * value. + */ +static int xdigit2val(unsigned char c) +{ + int val; + + if (isdigit(c)) + val = c - '0'; + else if (isupper(c)) + val = c - 'A' + 10; + else + val = c - 'a' + 10; + return val; +} + +static int +eal_parse_coremask(const char *coremask) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + int i, j, idx = 0; + unsigned count = 0; + char c; + int val; + + if (coremask == NULL) + return -1; + /* Remove all blank characters ahead and after . + * Remove 0x/0X if exists. + */ + while (isblank(*coremask)) + coremask++; + if (coremask[0] == '0' && ((coremask[1] == 'x') + || (coremask[1] == 'X'))) + coremask += 2; + i = strlen(coremask); + while ((i > 0) && isblank(coremask[i - 1])) + i--; + if (i == 0) + return -1; + + for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) { + c = coremask[i]; + if (isxdigit(c) == 0) { + /* invalid characters */ + return -1; + } + val = xdigit2val(c); + for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; j++, idx++) + { + if ((1 << j) & val) { + if (!lcore_config[idx].detected) { + RTE_LOG(ERR, EAL, "lcore %u " + "unavailable\n", idx); + return -1; + } + cfg->lcore_role[idx] = ROLE_RTE; + lcore_config[idx].core_index = count; + count++; + } else { + cfg->lcore_role[idx] = ROLE_OFF; + lcore_config[idx].core_index = -1; + } + } + } + for (; i >= 0; i--) + if (coremask[i] != '0') + return -1; + for (; idx < RTE_MAX_LCORE; idx++) { + cfg->lcore_role[idx] = ROLE_OFF; + lcore_config[idx].core_index = -1; + } + if (count == 0) + return -1; + /* Update the count of enabled logical cores of the EAL configuration */ + cfg->lcore_count = count; + return 0; +} + +static int +eal_parse_corelist(const char *corelist) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + int i, idx = 0; + unsigned count = 0; + char *end = NULL; + int min, max; + + if (corelist == NULL) + return -1; + + /* Remove all blank characters ahead and after */ + while (isblank(*corelist)) + corelist++; + i = strlen(corelist); + while ((i > 0) && isblank(corelist[i - 1])) + i--; + + /* Reset config */ + for (idx = 0; idx < RTE_MAX_LCORE; idx++) { + cfg->lcore_role[idx] = ROLE_OFF; + lcore_config[idx].core_index = -1; + } + + /* Get list of cores */ + min = RTE_MAX_LCORE; + do { + while (isblank(*corelist)) + corelist++; + if (*corelist == '\0') + return -1; + errno = 0; + idx = strtoul(corelist, &end, 10); + if (errno || end == NULL) + return -1; + while (isblank(*end)) + end++; + if (*end == '-') { + min = idx; + } else if ((*end == ',') || (*end == '\0')) { + max = idx; + if (min == RTE_MAX_LCORE) + min = idx; + for (idx = min; idx <= max; idx++) { + if (cfg->lcore_role[idx] != ROLE_RTE) { + cfg->lcore_role[idx] = ROLE_RTE; + lcore_config[idx].core_index = count; + count++; + } + } + min = RTE_MAX_LCORE; + } else + return -1; + corelist = end + 1; + } while (*end != '\0'); + + if (count == 0) + return -1; + + /* Update the count of enabled logical cores of the EAL configuration */ + cfg->lcore_count = count; + + return 0; +} + +/* Changes the lcore id of the master thread */ +static int +eal_parse_master_lcore(const char *arg) +{ + char *parsing_end; + struct rte_config *cfg = rte_eal_get_configuration(); + + errno = 0; + cfg->master_lcore = (uint32_t) strtol(arg, &parsing_end, 0); + if (errno || parsing_end[0] != 0) + return -1; + if (cfg->master_lcore >= RTE_MAX_LCORE) + return -1; + master_lcore_parsed = 1; + return 0; +} + +/* + * Parse elem, the elem could be single number/range or '(' ')' group + * 1) A single number elem, it's just a simple digit. e.g. 9 + * 2) A single range elem, two digits with a '-' between. e.g. 2-6 + * 3) A group elem, combines multiple 1) or 2) with '( )'. e.g (0,2-4,6) + * Within group elem, '-' used for a range separator; + * ',' used for a single number. + */ +static int +eal_parse_set(const char *input, uint16_t set[], unsigned num) +{ + unsigned idx; + const char *str = input; + char *end = NULL; + unsigned min, max; + + memset(set, 0, num * sizeof(uint16_t)); + + while (isblank(*str)) + str++; + + /* only digit or left bracket is qualify for start point */ + if ((!isdigit(*str) && *str != '(') || *str == '\0') + return -1; + + /* process single number or single range of number */ + if (*str != '(') { + errno = 0; + idx = strtoul(str, &end, 10); + if (errno || end == NULL || idx >= num) + return -1; + else { + while (isblank(*end)) + end++; + + min = idx; + max = idx; + if (*end == '-') { + /* process single - */ + end++; + while (isblank(*end)) + end++; + if (!isdigit(*end)) + return -1; + + errno = 0; + idx = strtoul(end, &end, 10); + if (errno || end == NULL || idx >= num) + return -1; + max = idx; + while (isblank(*end)) + end++; + if (*end != ',' && *end != '\0') + return -1; + } + + if (*end != ',' && *end != '\0' && + *end != '@') + return -1; + + for (idx = RTE_MIN(min, max); + idx <= RTE_MAX(min, max); idx++) + set[idx] = 1; + + return end - input; + } + } + + /* process set within bracket */ + str++; + while (isblank(*str)) + str++; + if (*str == '\0') + return -1; + + min = RTE_MAX_LCORE; + do { + + /* go ahead to the first digit */ + while (isblank(*str)) + str++; + if (!isdigit(*str)) + return -1; + + /* get the digit value */ + errno = 0; + idx = strtoul(str, &end, 10); + if (errno || end == NULL || idx >= num) + return -1; + + /* go ahead to separator '-',',' and ')' */ + while (isblank(*end)) + end++; + if (*end == '-') { + if (min == RTE_MAX_LCORE) + min = idx; + else /* avoid continuous '-' */ + return -1; + } else if ((*end == ',') || (*end == ')')) { + max = idx; + if (min == RTE_MAX_LCORE) + min = idx; + for (idx = RTE_MIN(min, max); + idx <= RTE_MAX(min, max); idx++) + set[idx] = 1; + + min = RTE_MAX_LCORE; + } else + return -1; + + str = end + 1; + } while (*end != '\0' && *end != ')'); + + return str - input; +} + +/* convert from set array to cpuset bitmap */ +static int +convert_to_cpuset(rte_cpuset_t *cpusetp, + uint16_t *set, unsigned num) +{ + unsigned idx; + + CPU_ZERO(cpusetp); + + for (idx = 0; idx < num; idx++) { + if (!set[idx]) + continue; + + if (!lcore_config[idx].detected) { + RTE_LOG(ERR, EAL, "core %u " + "unavailable\n", idx); + return -1; + } + + CPU_SET(idx, cpusetp); + } + + return 0; +} + +/* + * The format pattern: --lcores='[<,lcores[@cpus]>...]' + * lcores, cpus could be a single digit/range or a group. + * '(' and ')' are necessary if it's a group. + * If not supply '@cpus', the value of cpus uses the same as lcores. + * e.g. '1,2@(5-7),(3-5)@(0,2),(0,6),7-8' means start 9 EAL thread as below + * lcore 0 runs on cpuset 0x41 (cpu 0,6) + * lcore 1 runs on cpuset 0x2 (cpu 1) + * lcore 2 runs on cpuset 0xe0 (cpu 5,6,7) + * lcore 3,4,5 runs on cpuset 0x5 (cpu 0,2) + * lcore 6 runs on cpuset 0x41 (cpu 0,6) + * lcore 7 runs on cpuset 0x80 (cpu 7) + * lcore 8 runs on cpuset 0x100 (cpu 8) + */ +static int +eal_parse_lcores(const char *lcores) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + static uint16_t set[RTE_MAX_LCORE]; + unsigned idx = 0; + int i; + unsigned count = 0; + const char *lcore_start = NULL; + const char *end = NULL; + int offset; + rte_cpuset_t cpuset; + int lflags = 0; + int ret = -1; + + if (lcores == NULL) + return -1; + + /* Remove all blank characters ahead and after */ + while (isblank(*lcores)) + lcores++; + i = strlen(lcores); + while ((i > 0) && isblank(lcores[i - 1])) + i--; + + CPU_ZERO(&cpuset); + + /* Reset lcore config */ + for (idx = 0; idx < RTE_MAX_LCORE; idx++) { + cfg->lcore_role[idx] = ROLE_OFF; + lcore_config[idx].core_index = -1; + CPU_ZERO(&lcore_config[idx].cpuset); + } + + /* Get list of cores */ + do { + while (isblank(*lcores)) + lcores++; + if (*lcores == '\0') + goto err; + + /* record lcore_set start point */ + lcore_start = lcores; + + /* go across a complete bracket */ + if (*lcore_start == '(') { + lcores += strcspn(lcores, ")"); + if (*lcores++ == '\0') + goto err; + } + + /* scan the separator '@', ','(next) or '\0'(finish) */ + lcores += strcspn(lcores, "@,"); + + if (*lcores == '@') { + /* explicit assign cpu_set */ + offset = eal_parse_set(lcores + 1, set, RTE_DIM(set)); + if (offset < 0) + goto err; + + /* prepare cpu_set and update the end cursor */ + if (0 > convert_to_cpuset(&cpuset, + set, RTE_DIM(set))) + goto err; + end = lcores + 1 + offset; + } else { /* ',' or '\0' */ + /* haven't given cpu_set, current loop done */ + end = lcores; + + /* go back to check - */ + offset = strcspn(lcore_start, "(-"); + if (offset < (end - lcore_start) && + *(lcore_start + offset) != '(') + lflags = 1; + } + + if (*end != ',' && *end != '\0') + goto err; + + /* parse lcore_set from start point */ + if (0 > eal_parse_set(lcore_start, set, RTE_DIM(set))) + goto err; + + /* without '@', by default using lcore_set as cpu_set */ + if (*lcores != '@' && + 0 > convert_to_cpuset(&cpuset, set, RTE_DIM(set))) + goto err; + + /* start to update lcore_set */ + for (idx = 0; idx < RTE_MAX_LCORE; idx++) { + if (!set[idx]) + continue; + + if (cfg->lcore_role[idx] != ROLE_RTE) { + lcore_config[idx].core_index = count; + cfg->lcore_role[idx] = ROLE_RTE; + count++; + } + + if (lflags) { + CPU_ZERO(&cpuset); + CPU_SET(idx, &cpuset); + } + rte_memcpy(&lcore_config[idx].cpuset, &cpuset, + sizeof(rte_cpuset_t)); + } + + lcores = end + 1; + } while (*end != '\0'); + + if (count == 0) + goto err; + + cfg->lcore_count = count; + ret = 0; + +err: + + return ret; +} + +static int +eal_parse_syslog(const char *facility, struct internal_config *conf) +{ + int i; + static struct { + const char *name; + int value; + } map[] = { + { "auth", LOG_AUTH }, + { "cron", LOG_CRON }, + { "daemon", LOG_DAEMON }, + { "ftp", LOG_FTP }, + { "kern", LOG_KERN }, + { "lpr", LOG_LPR }, + { "mail", LOG_MAIL }, + { "news", LOG_NEWS }, + { "syslog", LOG_SYSLOG }, + { "user", LOG_USER }, + { "uucp", LOG_UUCP }, + { "local0", LOG_LOCAL0 }, + { "local1", LOG_LOCAL1 }, + { "local2", LOG_LOCAL2 }, + { "local3", LOG_LOCAL3 }, + { "local4", LOG_LOCAL4 }, + { "local5", LOG_LOCAL5 }, + { "local6", LOG_LOCAL6 }, + { "local7", LOG_LOCAL7 }, + { NULL, 0 } + }; + + for (i = 0; map[i].name; i++) { + if (!strcmp(facility, map[i].name)) { + conf->syslog_facility = map[i].value; + return 0; + } + } + return -1; +} + +static int +eal_parse_log_level(const char *level, uint32_t *log_level) +{ + char *end; + unsigned long tmp; + + errno = 0; + tmp = strtoul(level, &end, 0); + + /* check for errors */ + if ((errno != 0) || (level[0] == '\0') || + end == NULL || (*end != '\0')) + return -1; + + /* log_level is a uint32_t */ + if (tmp >= UINT32_MAX) + return -1; + + *log_level = tmp; + return 0; +} + +static enum rte_proc_type_t +eal_parse_proc_type(const char *arg) +{ + if (strncasecmp(arg, "primary", sizeof("primary")) == 0) + return RTE_PROC_PRIMARY; + if (strncasecmp(arg, "secondary", sizeof("secondary")) == 0) + return RTE_PROC_SECONDARY; + if (strncasecmp(arg, "auto", sizeof("auto")) == 0) + return RTE_PROC_AUTO; + + return RTE_PROC_INVALID; +} + +int +eal_parse_common_option(int opt, const char *optarg, + struct internal_config *conf) +{ + switch (opt) { + /* blacklist */ + case 'b': + if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_PCI, + optarg) < 0) { + return -1; + } + break; + /* whitelist */ + case 'w': + if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_PCI, + optarg) < 0) { + return -1; + } + break; + /* coremask */ + case 'c': + if (eal_parse_coremask(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid coremask\n"); + return -1; + } + break; + /* corelist */ + case 'l': + if (eal_parse_corelist(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid core list\n"); + return -1; + } + break; + /* size of memory */ + case 'm': + conf->memory = atoi(optarg); + conf->memory *= 1024ULL; + conf->memory *= 1024ULL; + mem_parsed = 1; + break; + /* force number of channels */ + case 'n': + conf->force_nchannel = atoi(optarg); + if (conf->force_nchannel == 0 || + conf->force_nchannel > 4) { + RTE_LOG(ERR, EAL, "invalid channel number\n"); + return -1; + } + break; + /* force number of ranks */ + case 'r': + conf->force_nrank = atoi(optarg); + if (conf->force_nrank == 0 || + conf->force_nrank > 16) { + RTE_LOG(ERR, EAL, "invalid rank number\n"); + return -1; + } + break; + /* force loading of external driver */ + case 'd': + if (eal_plugin_add(optarg) == -1) + return -1; + break; + case 'v': + /* since message is explicitly requested by user, we + * write message at highest log level so it can always + * be seen + * even if info or warning messages are disabled */ + RTE_LOG(CRIT, EAL, "RTE Version: '%s'\n", rte_version()); + break; + + /* long options */ + case OPT_HUGE_UNLINK_NUM: + conf->hugepage_unlink = 1; + break; + + case OPT_NO_HUGE_NUM: + conf->no_hugetlbfs = 1; + break; + + case OPT_NO_PCI_NUM: + conf->no_pci = 1; + break; + + case OPT_NO_HPET_NUM: + conf->no_hpet = 1; + break; + + case OPT_VMWARE_TSC_MAP_NUM: + conf->vmware_tsc_map = 1; + break; + + case OPT_NO_SHCONF_NUM: + conf->no_shconf = 1; + break; + + case OPT_PROC_TYPE_NUM: + conf->process_type = eal_parse_proc_type(optarg); + break; + + case OPT_MASTER_LCORE_NUM: + if (eal_parse_master_lcore(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameter for --" + OPT_MASTER_LCORE "\n"); + return -1; + } + break; + + case OPT_VDEV_NUM: + if (rte_eal_devargs_add(RTE_DEVTYPE_VIRTUAL, + optarg) < 0) { + return -1; + } + break; + + case OPT_SYSLOG_NUM: + if (eal_parse_syslog(optarg, conf) < 0) { + RTE_LOG(ERR, EAL, "invalid parameters for --" + OPT_SYSLOG "\n"); + return -1; + } + break; + + case OPT_LOG_LEVEL_NUM: { + uint32_t log; + + if (eal_parse_log_level(optarg, &log) < 0) { + RTE_LOG(ERR, EAL, + "invalid parameters for --" + OPT_LOG_LEVEL "\n"); + return -1; + } + conf->log_level = log; + break; + } + case OPT_LCORES_NUM: + if (eal_parse_lcores(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameter for --" + OPT_LCORES "\n"); + return -1; + } + break; + + /* don't know what to do, leave this to caller */ + default: + return 1; + + } + + return 0; +} + +int +eal_adjust_config(struct internal_config *internal_cfg) +{ + int i; + struct rte_config *cfg = rte_eal_get_configuration(); + + if (internal_config.process_type == RTE_PROC_AUTO) + internal_config.process_type = eal_proc_type_detect(); + + /* default master lcore is the first one */ + if (!master_lcore_parsed) + cfg->master_lcore = rte_get_next_lcore(-1, 0, 0); + + /* if no memory amounts were requested, this will result in 0 and + * will be overridden later, right after eal_hugepage_info_init() */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + internal_cfg->memory += internal_cfg->socket_mem[i]; + + return 0; +} + +int +eal_check_common_options(struct internal_config *internal_cfg) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + + if (cfg->lcore_role[cfg->master_lcore] != ROLE_RTE) { + RTE_LOG(ERR, EAL, "Master lcore is not enabled for DPDK\n"); + return -1; + } + + if (internal_cfg->process_type == RTE_PROC_INVALID) { + RTE_LOG(ERR, EAL, "Invalid process type specified\n"); + return -1; + } + if (index(internal_cfg->hugefile_prefix, '%') != NULL) { + RTE_LOG(ERR, EAL, "Invalid char, '%%', in --"OPT_FILE_PREFIX" " + "option\n"); + return -1; + } + if (mem_parsed && internal_cfg->force_sockets == 1) { + RTE_LOG(ERR, EAL, "Options -m and --"OPT_SOCKET_MEM" cannot " + "be specified at the same time\n"); + return -1; + } + if (internal_cfg->no_hugetlbfs && internal_cfg->force_sockets == 1) { + RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_MEM" cannot " + "be specified together with --"OPT_NO_HUGE"\n"); + return -1; + } + + if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) { + RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot " + "be specified together with --"OPT_NO_HUGE"\n"); + return -1; + } + + if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) != 0 && + rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_PCI) != 0) { + RTE_LOG(ERR, EAL, "Options blacklist (-b) and whitelist (-w) " + "cannot be used at the same time\n"); + return -1; + } + + return 0; +} + +void +eal_common_usage(void) +{ + printf("[options]\n\n" + "EAL common options:\n" + " -c COREMASK Hexadecimal bitmask of cores to run on\n" + " -l CORELIST List of cores to run on\n" + " The argument format is [-c2][,c3[-c4],...]\n" + " where c1, c2, etc are core indexes between 0 and %d\n" + " --"OPT_LCORES" COREMAP Map lcore set to physical cpu set\n" + " The argument format is\n" + " '[<,lcores[@cpus]>...]'\n" + " lcores and cpus list are grouped by '(' and ')'\n" + " Within the group, '-' is used for range separator,\n" + " ',' is used for single number separator.\n" + " '( )' can be omitted for single element group,\n" + " '@' can be omitted if cpus and lcores have the same value\n" + " --"OPT_MASTER_LCORE" ID Core ID that is used as master\n" + " -n CHANNELS Number of memory channels\n" + " -m MB Memory to allocate (see also --"OPT_SOCKET_MEM")\n" + " -r RANKS Force number of memory ranks (don't detect)\n" + " -b, --"OPT_PCI_BLACKLIST" Add a PCI device in black list.\n" + " Prevent EAL from using this PCI device. The argument\n" + " format is .\n" + " -w, --"OPT_PCI_WHITELIST" Add a PCI device in white list.\n" + " Only use the specified PCI devices. The argument format\n" + " is <[domain:]bus:devid.func>. This option can be present\n" + " several times (once per device).\n" + " [NOTE: PCI whitelist cannot be used with -b option]\n" + " --"OPT_VDEV" Add a virtual device.\n" + " The argument format is [,key=val,...]\n" + " (ex: --vdev=eth_pcap0,iface=eth2).\n" + " -d LIB.so|DIR Add a driver or driver directory\n" + " (can be used multiple times)\n" + " --"OPT_VMWARE_TSC_MAP" Use VMware TSC map instead of native RDTSC\n" + " --"OPT_PROC_TYPE" Type of this process (primary|secondary|auto)\n" + " --"OPT_SYSLOG" Set syslog facility\n" + " --"OPT_LOG_LEVEL" Set default log level\n" + " -v Display version information on startup\n" + " -h, --help This help\n" + "\nEAL options for DEBUG use only:\n" + " --"OPT_HUGE_UNLINK" Unlink hugepage files after init\n" + " --"OPT_NO_HUGE" Use malloc instead of hugetlbfs\n" + " --"OPT_NO_PCI" Disable PCI\n" + " --"OPT_NO_HPET" Disable HPET\n" + " --"OPT_NO_SHCONF" No shared config (mmap'd files)\n" + "\n", RTE_MAX_LCORE); +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_pci.c b/src/dpdk22/lib/librte_eal/common/eal_common_pci.c new file mode 100644 index 00000000..dcfe9478 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_pci.c @@ -0,0 +1,464 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* BSD LICENSE + * + * Copyright 2013-2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" + +struct pci_driver_list pci_driver_list; +struct pci_device_list pci_device_list; + +static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev) +{ + struct rte_devargs *devargs; + + TAILQ_FOREACH(devargs, &devargs_list, next) { + if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI && + devargs->type != RTE_DEVTYPE_WHITELISTED_PCI) + continue; + if (!rte_eal_compare_pci_addr(&dev->addr, &devargs->pci.addr)) + return devargs; + } + return NULL; +} + +/* map a particular resource from a file */ +void * +pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size, + int additional_flags) +{ + void *mapaddr; + + /* Map the PCI memory resource of device */ + mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, + MAP_SHARED | additional_flags, fd, offset); + if (mapaddr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n", + __func__, fd, requested_addr, + (unsigned long)size, (unsigned long)offset, + strerror(errno), mapaddr); + } else + RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); + + return mapaddr; +} + +/* unmap a particular resource */ +void +pci_unmap_resource(void *requested_addr, size_t size) +{ + if (requested_addr == NULL) + return; + + /* Unmap the PCI memory resource of device */ + if (munmap(requested_addr, size)) { + RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n", + __func__, requested_addr, (unsigned long)size, + strerror(errno)); + } else + RTE_LOG(DEBUG, EAL, " PCI memory unmapped at %p\n", + requested_addr); +} + +/* + * If vendor/device ID match, call the devinit() function of the + * driver. + */ +static int +rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *dev) +{ + int ret; + const struct rte_pci_id *id_table; + + for (id_table = dr->id_table; id_table->vendor_id != 0; id_table++) { + + /* check if device's identifiers match the driver's ones */ + if (id_table->vendor_id != dev->id.vendor_id && + id_table->vendor_id != PCI_ANY_ID) + continue; + if (id_table->device_id != dev->id.device_id && + id_table->device_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id && + id_table->subsystem_vendor_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_device_id != dev->id.subsystem_device_id && + id_table->subsystem_device_id != PCI_ANY_ID) + continue; + + struct rte_pci_addr *loc = &dev->addr; + + RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", + loc->domain, loc->bus, loc->devid, loc->function, + dev->numa_node); + + RTE_LOG(DEBUG, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, + dev->id.device_id, dr->name); + + /* no initialization when blacklisted, return without error */ + if (dev->devargs != NULL && + dev->devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) { + RTE_LOG(DEBUG, EAL, " Device is blacklisted, not initializing\n"); + return 1; + } + + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { +#ifdef RTE_PCI_CONFIG + /* + * Set PCIe config space for high performance. + * Return value can be ignored. + */ + pci_config_space_set(dev); +#endif + /* map resources for devices that use igb_uio */ + ret = pci_map_device(dev); + if (ret != 0) + return ret; + } else if (dr->drv_flags & RTE_PCI_DRV_FORCE_UNBIND && + rte_eal_process_type() == RTE_PROC_PRIMARY) { + /* unbind current driver */ + if (pci_unbind_kernel_driver(dev) < 0) + return -1; + } + + /* reference driver structure */ + dev->driver = dr; + + /* call the driver devinit() function */ + return dr->devinit(dr, dev); + } + /* return positive value if driver is not found */ + return 1; +} + +/* + * If vendor/device ID match, call the devuninit() function of the + * driver. + */ +static int +rte_eal_pci_detach_dev(struct rte_pci_driver *dr, + struct rte_pci_device *dev) +{ + const struct rte_pci_id *id_table; + + if ((dr == NULL) || (dev == NULL)) + return -EINVAL; + + for (id_table = dr->id_table; id_table->vendor_id != 0; id_table++) { + + /* check if device's identifiers match the driver's ones */ + if (id_table->vendor_id != dev->id.vendor_id && + id_table->vendor_id != PCI_ANY_ID) + continue; + if (id_table->device_id != dev->id.device_id && + id_table->device_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id && + id_table->subsystem_vendor_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_device_id != dev->id.subsystem_device_id && + id_table->subsystem_device_id != PCI_ANY_ID) + continue; + + struct rte_pci_addr *loc = &dev->addr; + + RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", + loc->domain, loc->bus, loc->devid, + loc->function, dev->numa_node); + + RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id, + dev->id.device_id, dr->name); + + if (dr->devuninit && (dr->devuninit(dev) < 0)) + return -1; /* negative value is an error */ + + /* clear driver structure */ + dev->driver = NULL; + + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) + /* unmap resources for devices that use igb_uio */ + pci_unmap_device(dev); + + return 0; + } + + /* return positive value if driver is not found */ + return 1; +} + +/* + * If vendor/device ID match, call the devinit() function of all + * registered driver for the given device. Return -1 if initialization + * failed, return 1 if no driver is found for this device. + */ +static int +pci_probe_all_drivers(struct rte_pci_device *dev) +{ + struct rte_pci_driver *dr = NULL; + int rc = 0; + + if (dev == NULL) + return -1; + + TAILQ_FOREACH(dr, &pci_driver_list, next) { + rc = rte_eal_pci_probe_one_driver(dr, dev); + if (rc < 0) + /* negative value is an error */ + return -1; + if (rc > 0) + /* positive value means driver not found */ + continue; + return 0; + } + return 1; +} + +/* + * If vendor/device ID match, call the devuninit() function of all + * registered driver for the given device. Return -1 if initialization + * failed, return 1 if no driver is found for this device. + */ +static int +pci_detach_all_drivers(struct rte_pci_device *dev) +{ + struct rte_pci_driver *dr = NULL; + int rc = 0; + + if (dev == NULL) + return -1; + + TAILQ_FOREACH(dr, &pci_driver_list, next) { + rc = rte_eal_pci_detach_dev(dr, dev); + if (rc < 0) + /* negative value is an error */ + return -1; + if (rc > 0) + /* positive value means driver not found */ + continue; + return 0; + } + return 1; +} + +/* + * Find the pci device specified by pci address, then invoke probe function of + * the driver of the devive. + */ +int +rte_eal_pci_probe_one(const struct rte_pci_addr *addr) +{ + struct rte_pci_device *dev = NULL; + int ret = 0; + + if (addr == NULL) + return -1; + + TAILQ_FOREACH(dev, &pci_device_list, next) { + if (rte_eal_compare_pci_addr(&dev->addr, addr)) + continue; + + ret = pci_probe_all_drivers(dev); + if (ret < 0) + goto err_return; + return 0; + } + return -1; + +err_return: + RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT + " cannot be used\n", dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + return -1; +} + +/* + * Detach device specified by its pci address. + */ +int +rte_eal_pci_detach(const struct rte_pci_addr *addr) +{ + struct rte_pci_device *dev = NULL; + int ret = 0; + + if (addr == NULL) + return -1; + + TAILQ_FOREACH(dev, &pci_device_list, next) { + if (rte_eal_compare_pci_addr(&dev->addr, addr)) + continue; + + ret = pci_detach_all_drivers(dev); + if (ret < 0) + goto err_return; + + TAILQ_REMOVE(&pci_device_list, dev, next); + return 0; + } + return -1; + +err_return: + RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT + " cannot be used\n", dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + return -1; +} + +/* + * Scan the content of the PCI bus, and call the devinit() function for + * all registered drivers that have a matching entry in its id_table + * for discovered devices. + */ +int +rte_eal_pci_probe(void) +{ + struct rte_pci_device *dev = NULL; + struct rte_devargs *devargs; + int probe_all = 0; + int ret = 0; + + if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) == 0) + probe_all = 1; + + TAILQ_FOREACH(dev, &pci_device_list, next) { + + /* set devargs in PCI structure */ + devargs = pci_devargs_lookup(dev); + if (devargs != NULL) + dev->devargs = devargs; + + /* probe all or only whitelisted devices */ + if (probe_all) + ret = pci_probe_all_drivers(dev); + else if (devargs != NULL && + devargs->type == RTE_DEVTYPE_WHITELISTED_PCI) + ret = pci_probe_all_drivers(dev); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Requested device " PCI_PRI_FMT + " cannot be used\n", dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + } + + return 0; +} + +/* dump one device */ +static int +pci_dump_one_device(FILE *f, struct rte_pci_device *dev) +{ + int i; + + fprintf(f, PCI_PRI_FMT, dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + fprintf(f, " - vendor:%x device:%x\n", dev->id.vendor_id, + dev->id.device_id); + + for (i = 0; i != sizeof(dev->mem_resource) / + sizeof(dev->mem_resource[0]); i++) { + fprintf(f, " %16.16"PRIx64" %16.16"PRIx64"\n", + dev->mem_resource[i].phys_addr, + dev->mem_resource[i].len); + } + return 0; +} + +/* dump devices on the bus */ +void +rte_eal_pci_dump(FILE *f) +{ + struct rte_pci_device *dev = NULL; + + TAILQ_FOREACH(dev, &pci_device_list, next) { + pci_dump_one_device(f, dev); + } +} + +/* register a driver */ +void +rte_eal_pci_register(struct rte_pci_driver *driver) +{ + TAILQ_INSERT_TAIL(&pci_driver_list, driver, next); +} + +/* unregister a driver */ +void +rte_eal_pci_unregister(struct rte_pci_driver *driver) +{ + TAILQ_REMOVE(&pci_driver_list, driver, next); +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_pci_uio.c b/src/dpdk22/lib/librte_eal/common/eal_common_pci_uio.c new file mode 100644 index 00000000..f062e81d --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_pci_uio.c @@ -0,0 +1,222 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "eal_private.h" + +static struct rte_tailq_elem rte_uio_tailq = { + .name = "UIO_RESOURCE_LIST", +}; +EAL_REGISTER_TAILQ(rte_uio_tailq) + +static int +pci_uio_map_secondary(struct rte_pci_device *dev) +{ + int fd, i; + struct mapped_pci_resource *uio_res; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + TAILQ_FOREACH(uio_res, uio_res_list, next) { + + /* skip this element if it doesn't match our PCI address */ + if (rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr)) + continue; + + for (i = 0; i != uio_res->nb_maps; i++) { + /* + * open devname, to mmap it + */ + fd = open(uio_res->maps[i].path, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + uio_res->maps[i].path, strerror(errno)); + return -1; + } + + void *mapaddr = pci_map_resource(uio_res->maps[i].addr, + fd, (off_t)uio_res->maps[i].offset, + (size_t)uio_res->maps[i].size, 0); + /* fd is not needed in slave process, close it */ + close(fd); + if (mapaddr != uio_res->maps[i].addr) { + RTE_LOG(ERR, EAL, + "Cannot mmap device resource file %s to address: %p\n", + uio_res->maps[i].path, + uio_res->maps[i].addr); + return -1; + } + } + return 0; + } + + RTE_LOG(ERR, EAL, "Cannot find resource for device\n"); + return 1; +} + +/* map the PCI resource of a PCI device in virtual memory */ +int +pci_uio_map_resource(struct rte_pci_device *dev) +{ + int i, map_idx = 0, ret; + uint64_t phaddr; + struct mapped_pci_resource *uio_res = NULL; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + dev->intr_handle.fd = -1; + dev->intr_handle.uio_cfg_fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + + /* secondary processes - use already recorded details */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return pci_uio_map_secondary(dev); + + /* allocate uio resource */ + ret = pci_uio_alloc_resource(dev, &uio_res); + if (ret) + return ret; + + /* Map all BARs */ + for (i = 0; i != PCI_MAX_RESOURCE; i++) { + /* skip empty BAR */ + phaddr = dev->mem_resource[i].phys_addr; + if (phaddr == 0) + continue; + + ret = pci_uio_map_resource_by_index(dev, i, + uio_res, map_idx); + if (ret) + goto error; + + map_idx++; + } + + uio_res->nb_maps = map_idx; + + TAILQ_INSERT_TAIL(uio_res_list, uio_res, next); + + return 0; +error: + for (i = 0; i < map_idx; i++) { + pci_unmap_resource(uio_res->maps[i].addr, + (size_t)uio_res->maps[i].size); + rte_free(uio_res->maps[i].path); + } + pci_uio_free_resource(dev, uio_res); + return -1; +} + +static void +pci_uio_unmap(struct mapped_pci_resource *uio_res) +{ + int i; + + if (uio_res == NULL) + return; + + for (i = 0; i != uio_res->nb_maps; i++) { + pci_unmap_resource(uio_res->maps[i].addr, + (size_t)uio_res->maps[i].size); + rte_free(uio_res->maps[i].path); + } +} + +static struct mapped_pci_resource * +pci_uio_find_resource(struct rte_pci_device *dev) +{ + struct mapped_pci_resource *uio_res; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + if (dev == NULL) + return NULL; + + TAILQ_FOREACH(uio_res, uio_res_list, next) { + + /* skip this element if it doesn't match our PCI address */ + if (!rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr)) + return uio_res; + } + return NULL; +} + +/* unmap the PCI resource of a PCI device in virtual memory */ +void +pci_uio_unmap_resource(struct rte_pci_device *dev) +{ + struct mapped_pci_resource *uio_res; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + if (dev == NULL) + return; + + /* find an entry for the device */ + uio_res = pci_uio_find_resource(dev); + if (uio_res == NULL) + return; + + /* secondary processes - just free maps */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return pci_uio_unmap(uio_res); + + TAILQ_REMOVE(uio_res_list, uio_res, next); + + /* unmap all resources */ + pci_uio_unmap(uio_res); + + /* free uio resource */ + rte_free(uio_res); + + /* close fd if in primary process */ + close(dev->intr_handle.fd); + if (dev->intr_handle.uio_cfg_fd >= 0) { + close(dev->intr_handle.uio_cfg_fd); + dev->intr_handle.uio_cfg_fd = -1; + } + + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_string_fns.c b/src/dpdk22/lib/librte_eal/common/eal_common_string_fns.c new file mode 100644 index 00000000..125a3e2d --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_string_fns.c @@ -0,0 +1,69 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include + +/* split string into tokens */ +int +rte_strsplit(char *string, int stringlen, + char **tokens, int maxtokens, char delim) +{ + int i, tok = 0; + int tokstart = 1; /* first token is right at start of string */ + + if (string == NULL || tokens == NULL) + goto einval_error; + + for (i = 0; i < stringlen; i++) { + if (string[i] == '\0' || tok >= maxtokens) + break; + if (tokstart) { + tokstart = 0; + tokens[tok++] = &string[i]; + } + if (string[i] == delim) { + string[i] = '\0'; + tokstart = 1; + } + } + return tok; + +einval_error: + errno = EINVAL; + return -1; +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_tailqs.c b/src/dpdk22/lib/librte_eal/common/eal_common_tailqs.c new file mode 100644 index 00000000..bb08ec8b --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_tailqs.c @@ -0,0 +1,202 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" + +TAILQ_HEAD(rte_tailq_elem_head, rte_tailq_elem); +/* local tailq list */ +static struct rte_tailq_elem_head rte_tailq_elem_head = + TAILQ_HEAD_INITIALIZER(rte_tailq_elem_head); + +/* number of tailqs registered, -1 before call to rte_eal_tailqs_init */ +static int rte_tailqs_count = -1; + +struct rte_tailq_head * +rte_eal_tailq_lookup(const char *name) +{ + unsigned i; + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + + if (name == NULL) + return NULL; + + for (i = 0; i < RTE_MAX_TAILQ; i++) { + if (!strncmp(name, mcfg->tailq_head[i].name, + RTE_TAILQ_NAMESIZE-1)) + return &mcfg->tailq_head[i]; + } + + return NULL; +} + +void +rte_dump_tailq(FILE *f) +{ + struct rte_mem_config *mcfg; + unsigned i = 0; + + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_read_lock(&mcfg->qlock); + for (i = 0; i < RTE_MAX_TAILQ; i++) { + const struct rte_tailq_head *tailq = &mcfg->tailq_head[i]; + const struct rte_tailq_entry_head *head = &tailq->tailq_head; + + fprintf(f, "Tailq %u: qname:<%s>, tqh_first:%p, tqh_last:%p\n", + i, tailq->name, head->tqh_first, head->tqh_last); + } + rte_rwlock_read_unlock(&mcfg->qlock); +} + +static struct rte_tailq_head * +rte_eal_tailq_create(const char *name) +{ + struct rte_tailq_head *head = NULL; + + if (!rte_eal_tailq_lookup(name) && + (rte_tailqs_count + 1 < RTE_MAX_TAILQ)) { + struct rte_mem_config *mcfg; + + mcfg = rte_eal_get_configuration()->mem_config; + head = &mcfg->tailq_head[rte_tailqs_count]; + snprintf(head->name, sizeof(head->name) - 1, "%s", name); + TAILQ_INIT(&head->tailq_head); + rte_tailqs_count++; + } + + return head; +} + +/* local register, used to store "early" tailqs before rte_eal_init() and to + * ensure secondary process only registers tailqs once. */ +static int +rte_eal_tailq_local_register(struct rte_tailq_elem *t) +{ + struct rte_tailq_elem *temp; + + TAILQ_FOREACH(temp, &rte_tailq_elem_head, next) { + if (!strncmp(t->name, temp->name, sizeof(temp->name))) + return -1; + } + + TAILQ_INSERT_TAIL(&rte_tailq_elem_head, t, next); + return 0; +} + +static void +rte_eal_tailq_update(struct rte_tailq_elem *t) +{ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + /* primary process is the only one that creates */ + t->head = rte_eal_tailq_create(t->name); + } else { + t->head = rte_eal_tailq_lookup(t->name); + } +} + +int +rte_eal_tailq_register(struct rte_tailq_elem *t) +{ + if (rte_eal_tailq_local_register(t) < 0) { + RTE_LOG(ERR, EAL, + "%s tailq is already registered\n", t->name); + goto error; + } + + /* if a register happens after rte_eal_tailqs_init(), then we can update + * tailq head */ + if (rte_tailqs_count >= 0) { + rte_eal_tailq_update(t); + if (t->head == NULL) { + RTE_LOG(ERR, EAL, + "Cannot initialize tailq: %s\n", t->name); + TAILQ_REMOVE(&rte_tailq_elem_head, t, next); + goto error; + } + } + + return 0; + +error: + t->head = NULL; + return -1; +} + +int +rte_eal_tailqs_init(void) +{ + struct rte_tailq_elem *t; + + rte_tailqs_count = 0; + + TAILQ_FOREACH(t, &rte_tailq_elem_head, next) { + /* second part of register job for "early" tailqs, see + * rte_eal_tailq_register and EAL_REGISTER_TAILQ */ + rte_eal_tailq_update(t); + if (t->head == NULL) { + RTE_LOG(ERR, EAL, + "Cannot initialize tailq: %s\n", t->name); + /* no need to TAILQ_REMOVE, we are going to panic in + * rte_eal_init() */ + goto fail; + } + } + + return 0; + +fail: + rte_dump_tailq(stderr); + return -1; +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_thread.c b/src/dpdk22/lib/librte_eal/common/eal_common_thread.c new file mode 100644 index 00000000..2405e93f --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_thread.c @@ -0,0 +1,157 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "eal_thread.h" + +RTE_DECLARE_PER_LCORE(unsigned , _socket_id); + +unsigned rte_socket_id(void) +{ + return RTE_PER_LCORE(_socket_id); +} + +int eal_cpuset_socket_id(rte_cpuset_t *cpusetp) +{ + unsigned cpu = 0; + int socket_id = SOCKET_ID_ANY; + int sid; + + if (cpusetp == NULL) + return SOCKET_ID_ANY; + + do { + if (!CPU_ISSET(cpu, cpusetp)) + continue; + + if (socket_id == SOCKET_ID_ANY) + socket_id = eal_cpu_socket_id(cpu); + + sid = eal_cpu_socket_id(cpu); + if (socket_id != sid) { + socket_id = SOCKET_ID_ANY; + break; + } + + } while (++cpu < RTE_MAX_LCORE); + + return socket_id; +} + +int +rte_thread_set_affinity(rte_cpuset_t *cpusetp) +{ + int s; + unsigned lcore_id; + pthread_t tid; + + tid = pthread_self(); + + s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp); + if (s != 0) { + RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n"); + return -1; + } + + /* store socket_id in TLS for quick access */ + RTE_PER_LCORE(_socket_id) = + eal_cpuset_socket_id(cpusetp); + + /* store cpuset in TLS for quick access */ + memmove(&RTE_PER_LCORE(_cpuset), cpusetp, + sizeof(rte_cpuset_t)); + + lcore_id = rte_lcore_id(); + if (lcore_id != (unsigned)LCORE_ID_ANY) { + /* EAL thread will update lcore_config */ + lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id); + memmove(&lcore_config[lcore_id].cpuset, cpusetp, + sizeof(rte_cpuset_t)); + } + + return 0; +} + +void +rte_thread_get_affinity(rte_cpuset_t *cpusetp) +{ + assert(cpusetp); + memmove(cpusetp, &RTE_PER_LCORE(_cpuset), + sizeof(rte_cpuset_t)); +} + +int +eal_thread_dump_affinity(char *str, unsigned size) +{ + rte_cpuset_t cpuset; + unsigned cpu; + int ret; + unsigned int out = 0; + + rte_thread_get_affinity(&cpuset); + + for (cpu = 0; cpu < RTE_MAX_LCORE; cpu++) { + if (!CPU_ISSET(cpu, &cpuset)) + continue; + + ret = snprintf(str + out, + size - out, "%u,", cpu); + if (ret < 0 || (unsigned)ret >= size - out) { + /* string will be truncated */ + ret = -1; + goto exit; + } + + out += ret; + } + + ret = 0; +exit: + /* remove the last separator */ + if (out > 0) + str[out - 1] = '\0'; + + return ret; +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_common_timer.c b/src/dpdk22/lib/librte_eal/common/eal_common_timer.c new file mode 100644 index 00000000..72371b88 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_common_timer.c @@ -0,0 +1,86 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "eal_private.h" + +/* The frequency of the RDTSC timer resolution */ +static uint64_t eal_tsc_resolution_hz; + +void +rte_delay_us(unsigned us) +{ + const uint64_t start = rte_get_timer_cycles(); + const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6; + while ((rte_get_timer_cycles() - start) < ticks) + rte_pause(); +} + +uint64_t +rte_get_tsc_hz(void) +{ + return eal_tsc_resolution_hz; +} + +static uint64_t +estimate_tsc_freq(void) +{ + RTE_LOG(WARNING, EAL, "WARNING: TSC frequency estimated roughly" + " - clock timings may be less accurate.\n"); + /* assume that the sleep(1) will sleep for 1 second */ + uint64_t start = rte_rdtsc(); + sleep(1); + return rte_rdtsc() - start; +} + +void +set_tsc_freq(void) +{ + uint64_t freq = get_tsc_freq(); + + if (!freq) + freq = estimate_tsc_freq(); + + RTE_LOG(INFO, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000); + eal_tsc_resolution_hz = freq; +} diff --git a/src/dpdk22/lib/librte_eal/common/eal_filesystem.h b/src/dpdk22/lib/librte_eal/common/eal_filesystem.h new file mode 100644 index 00000000..fdb4a70b --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_filesystem.h @@ -0,0 +1,118 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Stores functions and path defines for files and directories + * on the filesystem for Linux, that are used by the Linux EAL. + */ + +#ifndef EAL_FILESYSTEM_H +#define EAL_FILESYSTEM_H + +/** Path of rte config file. */ +#define RUNTIME_CONFIG_FMT "%s/.%s_config" + +#include +#include +#include +#include + +#include +#include "eal_internal_cfg.h" + +static const char *default_config_dir = "/var/run"; + +static inline const char * +eal_runtime_config_path(void) +{ + static char buffer[PATH_MAX]; /* static so auto-zeroed */ + const char *directory = default_config_dir; + const char *home_dir = getenv("HOME"); + + if (getuid() != 0 && home_dir != NULL) + directory = home_dir; + snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory, + internal_config.hugefile_prefix); + return buffer; +} + +/** Path of hugepage info file. */ +#define HUGEPAGE_INFO_FMT "%s/.%s_hugepage_info" + +static inline const char * +eal_hugepage_info_path(void) +{ + static char buffer[PATH_MAX]; /* static so auto-zeroed */ + const char *directory = default_config_dir; + const char *home_dir = getenv("HOME"); + + if (getuid() != 0 && home_dir != NULL) + directory = home_dir; + snprintf(buffer, sizeof(buffer) - 1, HUGEPAGE_INFO_FMT, directory, + internal_config.hugefile_prefix); + return buffer; +} + +/** String format for hugepage map files. */ +#define HUGEFILE_FMT "%s/%smap_%d" +#define TEMP_HUGEFILE_FMT "%s/%smap_temp_%d" + +static inline const char * +eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id) +{ + snprintf(buffer, buflen, HUGEFILE_FMT, hugedir, + internal_config.hugefile_prefix, f_id); + buffer[buflen - 1] = '\0'; + return buffer; +} + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS +static inline const char * +eal_get_hugefile_temp_path(char *buffer, size_t buflen, const char *hugedir, int f_id) +{ + snprintf(buffer, buflen, TEMP_HUGEFILE_FMT, hugedir, + internal_config.hugefile_prefix, f_id); + buffer[buflen - 1] = '\0'; + return buffer; +} +#endif + +/** define the default filename prefix for the %s values above */ +#define HUGEFILE_PREFIX_DEFAULT "rte" + +/** Function to read a single numeric value from a file on the filesystem. + * Used to read information from files on /sys */ +int eal_parse_sysfs_value(const char *filename, unsigned long *val); + +#endif /* EAL_FILESYSTEM_H */ diff --git a/src/dpdk22/lib/librte_eal/common/eal_hugepages.h b/src/dpdk22/lib/librte_eal/common/eal_hugepages.h new file mode 100644 index 00000000..38edac03 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_hugepages.h @@ -0,0 +1,67 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EAL_HUGEPAGES_H +#define EAL_HUGEPAGES_H + +#include +#include +#include + +#define MAX_HUGEPAGE_PATH PATH_MAX + +/** + * Structure used to store informations about hugepages that we mapped + * through the files in hugetlbfs. + */ +struct hugepage_file { + void *orig_va; /**< virtual addr of first mmap() */ + void *final_va; /**< virtual addr of 2nd mmap() */ + uint64_t physaddr; /**< physical addr */ + size_t size; /**< the page size */ + int socket_id; /**< NUMA socket ID */ + int file_id; /**< the '%d' in HUGEFILE_FMT */ + int memseg_id; /**< the memory segment to which page belongs */ +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + int repeated; /**< number of times the page size is repeated */ +#endif + char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */ +}; + +/** + * Read the information from linux on what hugepages are available + * for the EAL to use + */ +int eal_hugepage_info_init(void); + +#endif /* EAL_HUGEPAGES_H */ diff --git a/src/dpdk22/lib/librte_eal/common/eal_internal_cfg.h b/src/dpdk22/lib/librte_eal/common/eal_internal_cfg.h new file mode 100644 index 00000000..5f1367eb --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_internal_cfg.h @@ -0,0 +1,94 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Holds the structures for the eal internal configuration + */ + +#ifndef EAL_INTERNAL_CFG_H +#define EAL_INTERNAL_CFG_H + +#include +#include + +#define MAX_HUGEPAGE_SIZES 3 /**< support up to 3 page sizes */ + +/* + * internal configuration structure for the number, size and + * mount points of hugepages + */ +struct hugepage_info { + uint64_t hugepage_sz; /**< size of a huge page */ + const char *hugedir; /**< dir where hugetlbfs is mounted */ + uint32_t num_pages[RTE_MAX_NUMA_NODES]; + /**< number of hugepages of that size on each socket */ + int lock_descriptor; /**< file descriptor for hugepage dir */ +}; + +/** + * internal configuration + */ +struct internal_config { + volatile size_t memory; /**< amount of asked memory */ + volatile unsigned force_nchannel; /**< force number of channels */ + volatile unsigned force_nrank; /**< force number of ranks */ + volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */ + unsigned hugepage_unlink; /**< true to unlink backing files */ + volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/ + volatile unsigned no_pci; /**< true to disable PCI */ + volatile unsigned no_hpet; /**< true to disable HPET */ + volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping + * instead of native TSC */ + volatile unsigned no_shconf; /**< true if there is no shared config */ + volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */ + volatile enum rte_proc_type_t process_type; /**< multi-process proc type */ + /** true to try allocating memory on specific sockets */ + volatile unsigned force_sockets; + volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */ + uintptr_t base_virtaddr; /**< base address to try and reserve memory from */ + volatile int syslog_facility; /**< facility passed to openlog() */ + volatile uint32_t log_level; /**< default log level */ + /** default interrupt mode for VFIO */ + volatile enum rte_intr_mode vfio_intr_mode; + const char *hugefile_prefix; /**< the base filename of hugetlbfs files */ + const char *hugepage_dir; /**< specific hugetlbfs directory to use */ + + unsigned num_hugepage_sizes; /**< how many sizes on this system */ + struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES]; +}; +extern struct internal_config internal_config; /**< Global EAL configuration. */ + +void eal_reset_internal_config(struct internal_config *internal_cfg); + +#endif /* EAL_INTERNAL_CFG_H */ diff --git a/src/dpdk22/lib/librte_eal/common/eal_options.h b/src/dpdk22/lib/librte_eal/common/eal_options.h new file mode 100644 index 00000000..a881c62e --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_options.h @@ -0,0 +1,100 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EAL_OPTIONS_H +#define EAL_OPTIONS_H + +enum { + /* long options mapped to a short option */ +#define OPT_HELP "help" + OPT_HELP_NUM = 'h', +#define OPT_PCI_BLACKLIST "pci-blacklist" + OPT_PCI_BLACKLIST_NUM = 'b', +#define OPT_PCI_WHITELIST "pci-whitelist" + OPT_PCI_WHITELIST_NUM = 'w', + + /* first long only option value must be >= 256, so that we won't + * conflict with short options */ + OPT_LONG_MIN_NUM = 256, +#define OPT_BASE_VIRTADDR "base-virtaddr" + OPT_BASE_VIRTADDR_NUM, +#define OPT_CREATE_UIO_DEV "create-uio-dev" + OPT_CREATE_UIO_DEV_NUM, +#define OPT_FILE_PREFIX "file-prefix" + OPT_FILE_PREFIX_NUM, +#define OPT_HUGE_DIR "huge-dir" + OPT_HUGE_DIR_NUM, +#define OPT_HUGE_UNLINK "huge-unlink" + OPT_HUGE_UNLINK_NUM, +#define OPT_LCORES "lcores" + OPT_LCORES_NUM, +#define OPT_LOG_LEVEL "log-level" + OPT_LOG_LEVEL_NUM, +#define OPT_MASTER_LCORE "master-lcore" + OPT_MASTER_LCORE_NUM, +#define OPT_PROC_TYPE "proc-type" + OPT_PROC_TYPE_NUM, +#define OPT_NO_HPET "no-hpet" + OPT_NO_HPET_NUM, +#define OPT_NO_HUGE "no-huge" + OPT_NO_HUGE_NUM, +#define OPT_NO_PCI "no-pci" + OPT_NO_PCI_NUM, +#define OPT_NO_SHCONF "no-shconf" + OPT_NO_SHCONF_NUM, +#define OPT_SOCKET_MEM "socket-mem" + OPT_SOCKET_MEM_NUM, +#define OPT_SYSLOG "syslog" + OPT_SYSLOG_NUM, +#define OPT_VDEV "vdev" + OPT_VDEV_NUM, +#define OPT_VFIO_INTR "vfio-intr" + OPT_VFIO_INTR_NUM, +#define OPT_VMWARE_TSC_MAP "vmware-tsc-map" + OPT_VMWARE_TSC_MAP_NUM, +#define OPT_XEN_DOM0 "xen-dom0" + OPT_XEN_DOM0_NUM, + OPT_LONG_MAX_NUM +}; + +extern const char eal_short_options[]; +extern const struct option eal_long_options[]; + +int eal_parse_common_option(int opt, const char *argv, + struct internal_config *conf); +int eal_adjust_config(struct internal_config *internal_cfg); +int eal_check_common_options(struct internal_config *internal_cfg); +void eal_common_usage(void); +enum rte_proc_type_t eal_proc_type_detect(void); +int eal_plugins_init(void); + +#endif /* EAL_OPTIONS_H */ diff --git a/src/dpdk22/lib/librte_eal/common/eal_private.h b/src/dpdk22/lib/librte_eal/common/eal_private.h new file mode 100644 index 00000000..072e672b --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_private.h @@ -0,0 +1,349 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _EAL_PRIVATE_H_ +#define _EAL_PRIVATE_H_ + +#include +#include + +/** + * Initialize the memzone subsystem (private to eal). + * + * @return + * - 0 on success + * - Negative on error + */ +int rte_eal_memzone_init(void); + +/** + * Common log initialization function (private to eal). + * + * Called by environment-specific log initialization function to initialize + * log history. + * + * @param default_log + * The default log stream to be used. + * @return + * - 0 on success + * - Negative on error + */ +int rte_eal_common_log_init(FILE *default_log); + +/** + * Fill configuration with number of physical and logical processors + * + * This function is private to EAL. + * + * Parse /proc/cpuinfo to get the number of physical and logical + * processors on the machine. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_cpu_init(void); + +/** + * Map memory + * + * This function is private to EAL. + * + * Fill configuration structure with these infos, and return 0 on success. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_memory_init(void); + +/** + * Configure timers + * + * This function is private to EAL. + * + * Mmap memory areas used by HPET (high precision event timer) that will + * provide our time reference, and configure the TSC frequency also for it + * to be used as a reference. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_timer_init(void); + +/** + * Init early logs + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_log_early_init(void); + +/** + * Init the default log stream + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_log_init(const char *id, int facility); + +/** + * Init the default log stream + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_pci_init(void); + +#ifdef RTE_LIBRTE_IVSHMEM +/** + * Init the memory from IVSHMEM devices + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_ivshmem_init(void); + +/** + * Init objects in IVSHMEM devices + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_ivshmem_obj_init(void); +#endif + +struct rte_pci_driver; +struct rte_pci_device; + +/** + * Unbind kernel driver for this device + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int pci_unbind_kernel_driver(struct rte_pci_device *dev); + +/** + * Map this device + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error and positive if no driver + * is found for the device. + */ +int pci_map_device(struct rte_pci_device *dev); + +/** + * Unmap this device + * + * This function is private to EAL. + */ +void pci_unmap_device(struct rte_pci_device *dev); + +/** + * Map the PCI resource of a PCI device in virtual memory + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int pci_uio_map_resource(struct rte_pci_device *dev); + +/** + * Unmap the PCI resource of a PCI device + * + * This function is private to EAL. + */ +void pci_uio_unmap_resource(struct rte_pci_device *dev); + +/** + * Allocate uio resource for PCI device + * + * This function is private to EAL. + * + * @param dev + * PCI device to allocate uio resource + * @param uio_res + * Pointer to uio resource. + * If the function returns 0, the pointer will be filled. + * @return + * 0 on success, negative on error + */ +int pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res); + +/** + * Free uio resource for PCI device + * + * This function is private to EAL. + * + * @param dev + * PCI device to free uio resource + * @param uio_res + * Pointer to uio resource. + */ +void pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res); + +/** + * Map device memory to uio resource + * + * This function is private to EAL. + * + * @param dev + * PCI device that has memory information. + * @param res_idx + * Memory resource index of the PCI device. + * @param uio_res + * uio resource that will keep mapping information. + * @param map_idx + * Mapping information index of the uio resource. + * @return + * 0 on success, negative on error + */ +int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx); + +/** + * Init tail queues for non-EAL library structures. This is to allow + * the rings, mempools, etc. lists to be shared among multiple processes + * + * This function is private to EAL + * + * @return + * 0 on success, negative on error + */ +int rte_eal_tailqs_init(void); + +/** + * Init interrupt handling. + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_intr_init(void); + +/** + * Init alarm mechanism. This is to allow a callback be called after + * specific time. + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_alarm_init(void); + +/** + * This function initialises any virtual devices + * + * This function is private to the EAL. + */ +int rte_eal_dev_init(void); + +/** + * Function is to check if the kernel module(like, vfio, vfio_iommu_type1, + * etc.) loaded. + * + * @param module_name + * The module's name which need to be checked + * + * @return + * -1 means some error happens(NULL pointer or open failure) + * 0 means the module not loaded + * 1 means the module loaded + */ +int rte_eal_check_module(const char *module_name); + +/** + * Get cpu core_id. + * + * This function is private to the EAL. + */ +unsigned eal_cpu_core_id(unsigned lcore_id); + +/** + * Check if cpu is present. + * + * This function is private to the EAL. + */ +int eal_cpu_detected(unsigned lcore_id); + +/** + * Set TSC frequency from precise value or estimation + * + * This function is private to the EAL. + */ +void set_tsc_freq(void); + +/** + * Get precise TSC frequency from system + * + * This function is private to the EAL. + */ +uint64_t get_tsc_freq(void); + +/** + * Prepare physical memory mapping + * i.e. hugepages on Linux and + * contigmem on BSD. + * + * This function is private to the EAL. + */ +int rte_eal_hugepage_init(void); + +/** + * Creates memory mapping in secondary process + * i.e. hugepages on Linux and + * contigmem on BSD. + * + * This function is private to the EAL. + */ +int rte_eal_hugepage_attach(void); + +#endif /* _EAL_PRIVATE_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/eal_thread.h b/src/dpdk22/lib/librte_eal/common/eal_thread.h new file mode 100644 index 00000000..e4e76b9d --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/eal_thread.h @@ -0,0 +1,100 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EAL_THREAD_H +#define EAL_THREAD_H + +#include + +/** + * basic loop of thread, called for each thread by eal_init(). + * + * @param arg + * opaque pointer + */ +__attribute__((noreturn)) void *eal_thread_loop(void *arg); + +/** + * Init per-lcore info for master thread + * + * @param lcore_id + * identifier of master lcore + */ +void eal_thread_init_master(unsigned lcore_id); + +/** + * Get the NUMA socket id from cpu id. + * This function is private to EAL. + * + * @param cpu_id + * The logical process id. + * @return + * socket_id or SOCKET_ID_ANY + */ +unsigned eal_cpu_socket_id(unsigned cpu_id); + +/** + * Get the NUMA socket id from cpuset. + * This function is private to EAL. + * + * @param cpusetp + * The point to a valid cpu set. + * @return + * socket_id or SOCKET_ID_ANY + */ +int eal_cpuset_socket_id(rte_cpuset_t *cpusetp); + +/** + * Default buffer size to use with eal_thread_dump_affinity() + */ +#define RTE_CPU_AFFINITY_STR_LEN 256 + +/** + * Dump the current pthread cpuset. + * This function is private to EAL. + * + * Note: + * If the dump size is greater than the size of given buffer, + * the string will be truncated and with '\0' at the end. + * + * @param str + * The string buffer the cpuset will dump to. + * @param size + * The string buffer size. + * @return + * 0 for success, -1 if truncation happens. + */ +int +eal_thread_dump_affinity(char *str, unsigned size); + +#endif /* EAL_THREAD_H */ diff --git a/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_atomic.h new file mode 100644 index 00000000..41178c7b --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_atomic.h @@ -0,0 +1,222 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ATOMIC_X86_H_ +#define _RTE_ATOMIC_X86_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "generic/rte_atomic.h" + +#if RTE_MAX_LCORE == 1 +#define MPLOCKED /**< No need to insert MP lock prefix. */ +#else +#define MPLOCKED "lock ; " /**< Insert MP lock prefix. */ +#endif + +#define rte_mb() _mm_mfence() + +#define rte_wmb() _mm_sfence() + +#define rte_rmb() _mm_lfence() + +#define rte_smp_mb() rte_mb() + +#define rte_smp_wmb() rte_compiler_barrier() + +#define rte_smp_rmb() rte_compiler_barrier() + +/*------------------------- 16 bit atomic operations -------------------------*/ + +#ifndef RTE_FORCE_INTRINSICS +static inline int +rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src) +{ + uint8_t res; + + asm volatile( + MPLOCKED + "cmpxchgw %[src], %[dst];" + "sete %[res];" + : [res] "=a" (res), /* output */ + [dst] "=m" (*dst) + : [src] "r" (src), /* input */ + "a" (exp), + "m" (*dst) + : "memory"); /* no-clobber list */ + return res; +} + +static inline int rte_atomic16_test_and_set(rte_atomic16_t *v) +{ + return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1); +} + +static inline void +rte_atomic16_inc(rte_atomic16_t *v) +{ + asm volatile( + MPLOCKED + "incw %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline void +rte_atomic16_dec(rte_atomic16_t *v) +{ + asm volatile( + MPLOCKED + "decw %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v) +{ + uint8_t ret; + + asm volatile( + MPLOCKED + "incw %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + return (ret != 0); +} + +static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v) +{ + uint8_t ret; + + asm volatile(MPLOCKED + "decw %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + return (ret != 0); +} + +/*------------------------- 32 bit atomic operations -------------------------*/ + +static inline int +rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) +{ + uint8_t res; + + asm volatile( + MPLOCKED + "cmpxchgl %[src], %[dst];" + "sete %[res];" + : [res] "=a" (res), /* output */ + [dst] "=m" (*dst) + : [src] "r" (src), /* input */ + "a" (exp), + "m" (*dst) + : "memory"); /* no-clobber list */ + return res; +} + +static inline int rte_atomic32_test_and_set(rte_atomic32_t *v) +{ + return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1); +} + +static inline void +rte_atomic32_inc(rte_atomic32_t *v) +{ + asm volatile( + MPLOCKED + "incl %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline void +rte_atomic32_dec(rte_atomic32_t *v) +{ + asm volatile( + MPLOCKED + "decl %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v) +{ + uint8_t ret; + + asm volatile( + MPLOCKED + "incl %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + return (ret != 0); +} + +static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v) +{ + uint8_t ret; + + asm volatile(MPLOCKED + "decl %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + return (ret != 0); +} +#endif + +#ifdef RTE_ARCH_I686 +#include "rte_atomic_32.h" +#else +#include "rte_atomic_64.h" +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ATOMIC_X86_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h new file mode 100644 index 00000000..400d8a96 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h @@ -0,0 +1,222 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Inspired from FreeBSD src/sys/i386/include/atomic.h + * Copyright (c) 1998 Doug Rabson + * All rights reserved. + */ + +#ifndef _RTE_ATOMIC_I686_H_ +#define _RTE_ATOMIC_I686_H_ + +/*------------------------- 64 bit atomic operations -------------------------*/ + +#ifndef RTE_FORCE_INTRINSICS +static inline int +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) +{ + uint8_t res; + union { + struct { + uint32_t l32; + uint32_t h32; + }; + uint64_t u64; + } _exp, _src; + + _exp.u64 = exp; + _src.u64 = src; + +#ifndef __PIC__ + asm volatile ( + MPLOCKED + "cmpxchg8b (%[dst]);" + "setz %[res];" + : [res] "=a" (res) /* result in eax */ + : [dst] "S" (dst), /* esi */ + "b" (_src.l32), /* ebx */ + "c" (_src.h32), /* ecx */ + "a" (_exp.l32), /* eax */ + "d" (_exp.h32) /* edx */ + : "memory" ); /* no-clobber list */ +#else + asm volatile ( + "mov %%ebx, %%edi\n" + MPLOCKED + "cmpxchg8b (%[dst]);" + "setz %[res];" + "xchgl %%ebx, %%edi;\n" + : [res] "=a" (res) /* result in eax */ + : [dst] "S" (dst), /* esi */ + "D" (_src.l32), /* ebx */ + "c" (_src.h32), /* ecx */ + "a" (_exp.l32), /* eax */ + "d" (_exp.h32) /* edx */ + : "memory" ); /* no-clobber list */ +#endif + + return res; +} + +static inline void +rte_atomic64_init(rte_atomic64_t *v) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, 0); + } +} + +static inline int64_t +rte_atomic64_read(rte_atomic64_t *v) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + /* replace the value by itself */ + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp); + } + return tmp; +} + +static inline void +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, new_value); + } +} + +static inline void +rte_atomic64_add(rte_atomic64_t *v, int64_t inc) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp + inc); + } +} + +static inline void +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp - dec); + } +} + +static inline void +rte_atomic64_inc(rte_atomic64_t *v) +{ + rte_atomic64_add(v, 1); +} + +static inline void +rte_atomic64_dec(rte_atomic64_t *v) +{ + rte_atomic64_sub(v, 1); +} + +static inline int64_t +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp + inc); + } + + return tmp + inc; +} + +static inline int64_t +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp - dec); + } + + return tmp - dec; +} + +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v) +{ + return rte_atomic64_add_return(v, 1) == 0; +} + +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v) +{ + return rte_atomic64_sub_return(v, 1) == 0; +} + +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) +{ + return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); +} + +static inline void rte_atomic64_clear(rte_atomic64_t *v) +{ + rte_atomic64_set(v, 0); +} +#endif + +#endif /* _RTE_ATOMIC_I686_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h new file mode 100644 index 00000000..4de66000 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h @@ -0,0 +1,191 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Inspired from FreeBSD src/sys/amd64/include/atomic.h + * Copyright (c) 1998 Doug Rabson + * All rights reserved. + */ + +#ifndef _RTE_ATOMIC_X86_64_H_ +#define _RTE_ATOMIC_X86_64_H_ + +/*------------------------- 64 bit atomic operations -------------------------*/ + +#ifndef RTE_FORCE_INTRINSICS +static inline int +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) +{ + uint8_t res; + + + asm volatile( + MPLOCKED + "cmpxchgq %[src], %[dst];" + "sete %[res];" + : [res] "=a" (res), /* output */ + [dst] "=m" (*dst) + : [src] "r" (src), /* input */ + "a" (exp), + "m" (*dst) + : "memory"); /* no-clobber list */ + + return res; +} + +static inline void +rte_atomic64_init(rte_atomic64_t *v) +{ + v->cnt = 0; +} + +static inline int64_t +rte_atomic64_read(rte_atomic64_t *v) +{ + return v->cnt; +} + +static inline void +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) +{ + v->cnt = new_value; +} + +static inline void +rte_atomic64_add(rte_atomic64_t *v, int64_t inc) +{ + asm volatile( + MPLOCKED + "addq %[inc], %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : [inc] "ir" (inc), /* input */ + "m" (v->cnt) + ); +} + +static inline void +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) +{ + asm volatile( + MPLOCKED + "subq %[dec], %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : [dec] "ir" (dec), /* input */ + "m" (v->cnt) + ); +} + +static inline void +rte_atomic64_inc(rte_atomic64_t *v) +{ + asm volatile( + MPLOCKED + "incq %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline void +rte_atomic64_dec(rte_atomic64_t *v) +{ + asm volatile( + MPLOCKED + "decq %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline int64_t +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) +{ + int64_t prev = inc; + + asm volatile( + MPLOCKED + "xaddq %[prev], %[cnt]" + : [prev] "+r" (prev), /* output */ + [cnt] "=m" (v->cnt) + : "m" (v->cnt) /* input */ + ); + return prev + inc; +} + +static inline int64_t +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) +{ + return rte_atomic64_add_return(v, -dec); +} + +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v) +{ + uint8_t ret; + + asm volatile( + MPLOCKED + "incq %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + + return ret != 0; +} + +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v) +{ + uint8_t ret; + + asm volatile( + MPLOCKED + "decq %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + return ret != 0; +} + +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) +{ + return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); +} + +static inline void rte_atomic64_clear(rte_atomic64_t *v) +{ + v->cnt = 0; +} +#endif + +#endif /* _RTE_ATOMIC_X86_64_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_byteorder.h b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_byteorder.h new file mode 100644 index 00000000..ffdb6ef5 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_byteorder.h @@ -0,0 +1,125 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BYTEORDER_X86_H_ +#define _RTE_BYTEORDER_X86_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_byteorder.h" + +#ifndef RTE_BYTE_ORDER +#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN +#endif + +/* + * An architecture-optimized byte swap for a 16-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap16(). + */ +static inline uint16_t rte_arch_bswap16(uint16_t _x) +{ + register uint16_t x = _x; + asm volatile ("xchgb %b[x1],%h[x2]" + : [x1] "=Q" (x) + : [x2] "0" (x) + ); + return x; +} + +/* + * An architecture-optimized byte swap for a 32-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap32(). + */ +static inline uint32_t rte_arch_bswap32(uint32_t _x) +{ + register uint32_t x = _x; + asm volatile ("bswap %[x]" + : [x] "+r" (x) + ); + return x; +} + +#ifndef RTE_FORCE_INTRINSICS +#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap16(x) : \ + rte_arch_bswap16(x))) + +#define rte_bswap32(x) ((uint32_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap32(x) : \ + rte_arch_bswap32(x))) + +#define rte_bswap64(x) ((uint64_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap64(x) : \ + rte_arch_bswap64(x))) +#else +/* + * __builtin_bswap16 is only available gcc 4.8 and upwards + */ +#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8) +#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap16(x) : \ + rte_arch_bswap16(x))) +#endif +#endif + +#define rte_cpu_to_le_16(x) (x) +#define rte_cpu_to_le_32(x) (x) +#define rte_cpu_to_le_64(x) (x) + +#define rte_cpu_to_be_16(x) rte_bswap16(x) +#define rte_cpu_to_be_32(x) rte_bswap32(x) +#define rte_cpu_to_be_64(x) rte_bswap64(x) + +#define rte_le_to_cpu_16(x) (x) +#define rte_le_to_cpu_32(x) (x) +#define rte_le_to_cpu_64(x) (x) + +#define rte_be_to_cpu_16(x) rte_bswap16(x) +#define rte_be_to_cpu_32(x) rte_bswap32(x) +#define rte_be_to_cpu_64(x) rte_bswap64(x) + +#ifdef RTE_ARCH_I686 +#include "rte_byteorder_32.h" +#else +#include "rte_byteorder_64.h" +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BYTEORDER_X86_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h new file mode 100644 index 00000000..51c306f8 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h @@ -0,0 +1,51 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BYTEORDER_I686_H_ +#define _RTE_BYTEORDER_I686_H_ + +/* + * An architecture-optimized byte swap for a 64-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap64(). + */ +/* Compat./Leg. mode */ +static inline uint64_t rte_arch_bswap64(uint64_t x) +{ + uint64_t ret = 0; + ret |= ((uint64_t)rte_arch_bswap32(x & 0xffffffffUL) << 32); + ret |= ((uint64_t)rte_arch_bswap32((x >> 32) & 0xffffffffUL)); + return ret; +} + +#endif /* _RTE_BYTEORDER_I686_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h new file mode 100644 index 00000000..dda572bd --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h @@ -0,0 +1,52 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BYTEORDER_X86_64_H_ +#define _RTE_BYTEORDER_X86_64_H_ + +/* + * An architecture-optimized byte swap for a 64-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap64(). + */ +/* 64-bit mode */ +static inline uint64_t rte_arch_bswap64(uint64_t _x) +{ + register uint64_t x = _x; + asm volatile ("bswap %[x]" + : [x] "+r" (x) + ); + return x; +} + +#endif /* _RTE_BYTEORDER_X86_64_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_cpuflags.h b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_cpuflags.h new file mode 100644 index 00000000..dd565535 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_cpuflags.h @@ -0,0 +1,310 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CPUFLAGS_X86_64_H_ +#define _RTE_CPUFLAGS_X86_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +#include "generic/rte_cpuflags.h" + +enum rte_cpu_flag_t { + /* (EAX 01h) ECX features*/ + RTE_CPUFLAG_SSE3 = 0, /**< SSE3 */ + RTE_CPUFLAG_PCLMULQDQ, /**< PCLMULQDQ */ + RTE_CPUFLAG_DTES64, /**< DTES64 */ + RTE_CPUFLAG_MONITOR, /**< MONITOR */ + RTE_CPUFLAG_DS_CPL, /**< DS_CPL */ + RTE_CPUFLAG_VMX, /**< VMX */ + RTE_CPUFLAG_SMX, /**< SMX */ + RTE_CPUFLAG_EIST, /**< EIST */ + RTE_CPUFLAG_TM2, /**< TM2 */ + RTE_CPUFLAG_SSSE3, /**< SSSE3 */ + RTE_CPUFLAG_CNXT_ID, /**< CNXT_ID */ + RTE_CPUFLAG_FMA, /**< FMA */ + RTE_CPUFLAG_CMPXCHG16B, /**< CMPXCHG16B */ + RTE_CPUFLAG_XTPR, /**< XTPR */ + RTE_CPUFLAG_PDCM, /**< PDCM */ + RTE_CPUFLAG_PCID, /**< PCID */ + RTE_CPUFLAG_DCA, /**< DCA */ + RTE_CPUFLAG_SSE4_1, /**< SSE4_1 */ + RTE_CPUFLAG_SSE4_2, /**< SSE4_2 */ + RTE_CPUFLAG_X2APIC, /**< X2APIC */ + RTE_CPUFLAG_MOVBE, /**< MOVBE */ + RTE_CPUFLAG_POPCNT, /**< POPCNT */ + RTE_CPUFLAG_TSC_DEADLINE, /**< TSC_DEADLINE */ + RTE_CPUFLAG_AES, /**< AES */ + RTE_CPUFLAG_XSAVE, /**< XSAVE */ + RTE_CPUFLAG_OSXSAVE, /**< OSXSAVE */ + RTE_CPUFLAG_AVX, /**< AVX */ + RTE_CPUFLAG_F16C, /**< F16C */ + RTE_CPUFLAG_RDRAND, /**< RDRAND */ + + /* (EAX 01h) EDX features */ + RTE_CPUFLAG_FPU, /**< FPU */ + RTE_CPUFLAG_VME, /**< VME */ + RTE_CPUFLAG_DE, /**< DE */ + RTE_CPUFLAG_PSE, /**< PSE */ + RTE_CPUFLAG_TSC, /**< TSC */ + RTE_CPUFLAG_MSR, /**< MSR */ + RTE_CPUFLAG_PAE, /**< PAE */ + RTE_CPUFLAG_MCE, /**< MCE */ + RTE_CPUFLAG_CX8, /**< CX8 */ + RTE_CPUFLAG_APIC, /**< APIC */ + RTE_CPUFLAG_SEP, /**< SEP */ + RTE_CPUFLAG_MTRR, /**< MTRR */ + RTE_CPUFLAG_PGE, /**< PGE */ + RTE_CPUFLAG_MCA, /**< MCA */ + RTE_CPUFLAG_CMOV, /**< CMOV */ + RTE_CPUFLAG_PAT, /**< PAT */ + RTE_CPUFLAG_PSE36, /**< PSE36 */ + RTE_CPUFLAG_PSN, /**< PSN */ + RTE_CPUFLAG_CLFSH, /**< CLFSH */ + RTE_CPUFLAG_DS, /**< DS */ + RTE_CPUFLAG_ACPI, /**< ACPI */ + RTE_CPUFLAG_MMX, /**< MMX */ + RTE_CPUFLAG_FXSR, /**< FXSR */ + RTE_CPUFLAG_SSE, /**< SSE */ + RTE_CPUFLAG_SSE2, /**< SSE2 */ + RTE_CPUFLAG_SS, /**< SS */ + RTE_CPUFLAG_HTT, /**< HTT */ + RTE_CPUFLAG_TM, /**< TM */ + RTE_CPUFLAG_PBE, /**< PBE */ + + /* (EAX 06h) EAX features */ + RTE_CPUFLAG_DIGTEMP, /**< DIGTEMP */ + RTE_CPUFLAG_TRBOBST, /**< TRBOBST */ + RTE_CPUFLAG_ARAT, /**< ARAT */ + RTE_CPUFLAG_PLN, /**< PLN */ + RTE_CPUFLAG_ECMD, /**< ECMD */ + RTE_CPUFLAG_PTM, /**< PTM */ + + /* (EAX 06h) ECX features */ + RTE_CPUFLAG_MPERF_APERF_MSR, /**< MPERF_APERF_MSR */ + RTE_CPUFLAG_ACNT2, /**< ACNT2 */ + RTE_CPUFLAG_ENERGY_EFF, /**< ENERGY_EFF */ + + /* (EAX 07h, ECX 0h) EBX features */ + RTE_CPUFLAG_FSGSBASE, /**< FSGSBASE */ + RTE_CPUFLAG_BMI1, /**< BMI1 */ + RTE_CPUFLAG_HLE, /**< Hardware Lock elision */ + RTE_CPUFLAG_AVX2, /**< AVX2 */ + RTE_CPUFLAG_SMEP, /**< SMEP */ + RTE_CPUFLAG_BMI2, /**< BMI2 */ + RTE_CPUFLAG_ERMS, /**< ERMS */ + RTE_CPUFLAG_INVPCID, /**< INVPCID */ + RTE_CPUFLAG_RTM, /**< Transactional memory */ + + /* (EAX 80000001h) ECX features */ + RTE_CPUFLAG_LAHF_SAHF, /**< LAHF_SAHF */ + RTE_CPUFLAG_LZCNT, /**< LZCNT */ + + /* (EAX 80000001h) EDX features */ + RTE_CPUFLAG_SYSCALL, /**< SYSCALL */ + RTE_CPUFLAG_XD, /**< XD */ + RTE_CPUFLAG_1GB_PG, /**< 1GB_PG */ + RTE_CPUFLAG_RDTSCP, /**< RDTSCP */ + RTE_CPUFLAG_EM64T, /**< EM64T */ + + /* (EAX 80000007h) EDX features */ + RTE_CPUFLAG_INVTSC, /**< INVTSC */ + + /* The last item */ + RTE_CPUFLAG_NUMFLAGS, /**< This should always be the last! */ +}; + +enum cpu_register_t { + RTE_REG_EAX = 0, + RTE_REG_EBX, + RTE_REG_ECX, + RTE_REG_EDX, +}; + +static const struct feature_entry cpu_feature_table[] = { + FEAT_DEF(SSE3, 0x00000001, 0, RTE_REG_ECX, 0) + FEAT_DEF(PCLMULQDQ, 0x00000001, 0, RTE_REG_ECX, 1) + FEAT_DEF(DTES64, 0x00000001, 0, RTE_REG_ECX, 2) + FEAT_DEF(MONITOR, 0x00000001, 0, RTE_REG_ECX, 3) + FEAT_DEF(DS_CPL, 0x00000001, 0, RTE_REG_ECX, 4) + FEAT_DEF(VMX, 0x00000001, 0, RTE_REG_ECX, 5) + FEAT_DEF(SMX, 0x00000001, 0, RTE_REG_ECX, 6) + FEAT_DEF(EIST, 0x00000001, 0, RTE_REG_ECX, 7) + FEAT_DEF(TM2, 0x00000001, 0, RTE_REG_ECX, 8) + FEAT_DEF(SSSE3, 0x00000001, 0, RTE_REG_ECX, 9) + FEAT_DEF(CNXT_ID, 0x00000001, 0, RTE_REG_ECX, 10) + FEAT_DEF(FMA, 0x00000001, 0, RTE_REG_ECX, 12) + FEAT_DEF(CMPXCHG16B, 0x00000001, 0, RTE_REG_ECX, 13) + FEAT_DEF(XTPR, 0x00000001, 0, RTE_REG_ECX, 14) + FEAT_DEF(PDCM, 0x00000001, 0, RTE_REG_ECX, 15) + FEAT_DEF(PCID, 0x00000001, 0, RTE_REG_ECX, 17) + FEAT_DEF(DCA, 0x00000001, 0, RTE_REG_ECX, 18) + FEAT_DEF(SSE4_1, 0x00000001, 0, RTE_REG_ECX, 19) + FEAT_DEF(SSE4_2, 0x00000001, 0, RTE_REG_ECX, 20) + FEAT_DEF(X2APIC, 0x00000001, 0, RTE_REG_ECX, 21) + FEAT_DEF(MOVBE, 0x00000001, 0, RTE_REG_ECX, 22) + FEAT_DEF(POPCNT, 0x00000001, 0, RTE_REG_ECX, 23) + FEAT_DEF(TSC_DEADLINE, 0x00000001, 0, RTE_REG_ECX, 24) + FEAT_DEF(AES, 0x00000001, 0, RTE_REG_ECX, 25) + FEAT_DEF(XSAVE, 0x00000001, 0, RTE_REG_ECX, 26) + FEAT_DEF(OSXSAVE, 0x00000001, 0, RTE_REG_ECX, 27) + FEAT_DEF(AVX, 0x00000001, 0, RTE_REG_ECX, 28) + FEAT_DEF(F16C, 0x00000001, 0, RTE_REG_ECX, 29) + FEAT_DEF(RDRAND, 0x00000001, 0, RTE_REG_ECX, 30) + + FEAT_DEF(FPU, 0x00000001, 0, RTE_REG_EDX, 0) + FEAT_DEF(VME, 0x00000001, 0, RTE_REG_EDX, 1) + FEAT_DEF(DE, 0x00000001, 0, RTE_REG_EDX, 2) + FEAT_DEF(PSE, 0x00000001, 0, RTE_REG_EDX, 3) + FEAT_DEF(TSC, 0x00000001, 0, RTE_REG_EDX, 4) + FEAT_DEF(MSR, 0x00000001, 0, RTE_REG_EDX, 5) + FEAT_DEF(PAE, 0x00000001, 0, RTE_REG_EDX, 6) + FEAT_DEF(MCE, 0x00000001, 0, RTE_REG_EDX, 7) + FEAT_DEF(CX8, 0x00000001, 0, RTE_REG_EDX, 8) + FEAT_DEF(APIC, 0x00000001, 0, RTE_REG_EDX, 9) + FEAT_DEF(SEP, 0x00000001, 0, RTE_REG_EDX, 11) + FEAT_DEF(MTRR, 0x00000001, 0, RTE_REG_EDX, 12) + FEAT_DEF(PGE, 0x00000001, 0, RTE_REG_EDX, 13) + FEAT_DEF(MCA, 0x00000001, 0, RTE_REG_EDX, 14) + FEAT_DEF(CMOV, 0x00000001, 0, RTE_REG_EDX, 15) + FEAT_DEF(PAT, 0x00000001, 0, RTE_REG_EDX, 16) + FEAT_DEF(PSE36, 0x00000001, 0, RTE_REG_EDX, 17) + FEAT_DEF(PSN, 0x00000001, 0, RTE_REG_EDX, 18) + FEAT_DEF(CLFSH, 0x00000001, 0, RTE_REG_EDX, 19) + FEAT_DEF(DS, 0x00000001, 0, RTE_REG_EDX, 21) + FEAT_DEF(ACPI, 0x00000001, 0, RTE_REG_EDX, 22) + FEAT_DEF(MMX, 0x00000001, 0, RTE_REG_EDX, 23) + FEAT_DEF(FXSR, 0x00000001, 0, RTE_REG_EDX, 24) + FEAT_DEF(SSE, 0x00000001, 0, RTE_REG_EDX, 25) + FEAT_DEF(SSE2, 0x00000001, 0, RTE_REG_EDX, 26) + FEAT_DEF(SS, 0x00000001, 0, RTE_REG_EDX, 27) + FEAT_DEF(HTT, 0x00000001, 0, RTE_REG_EDX, 28) + FEAT_DEF(TM, 0x00000001, 0, RTE_REG_EDX, 29) + FEAT_DEF(PBE, 0x00000001, 0, RTE_REG_EDX, 31) + + FEAT_DEF(DIGTEMP, 0x00000006, 0, RTE_REG_EAX, 0) + FEAT_DEF(TRBOBST, 0x00000006, 0, RTE_REG_EAX, 1) + FEAT_DEF(ARAT, 0x00000006, 0, RTE_REG_EAX, 2) + FEAT_DEF(PLN, 0x00000006, 0, RTE_REG_EAX, 4) + FEAT_DEF(ECMD, 0x00000006, 0, RTE_REG_EAX, 5) + FEAT_DEF(PTM, 0x00000006, 0, RTE_REG_EAX, 6) + + FEAT_DEF(MPERF_APERF_MSR, 0x00000006, 0, RTE_REG_ECX, 0) + FEAT_DEF(ACNT2, 0x00000006, 0, RTE_REG_ECX, 1) + FEAT_DEF(ENERGY_EFF, 0x00000006, 0, RTE_REG_ECX, 3) + + FEAT_DEF(FSGSBASE, 0x00000007, 0, RTE_REG_EBX, 0) + FEAT_DEF(BMI1, 0x00000007, 0, RTE_REG_EBX, 2) + FEAT_DEF(HLE, 0x00000007, 0, RTE_REG_EBX, 4) + FEAT_DEF(AVX2, 0x00000007, 0, RTE_REG_EBX, 5) + FEAT_DEF(SMEP, 0x00000007, 0, RTE_REG_EBX, 6) + FEAT_DEF(BMI2, 0x00000007, 0, RTE_REG_EBX, 7) + FEAT_DEF(ERMS, 0x00000007, 0, RTE_REG_EBX, 8) + FEAT_DEF(INVPCID, 0x00000007, 0, RTE_REG_EBX, 10) + FEAT_DEF(RTM, 0x00000007, 0, RTE_REG_EBX, 11) + + FEAT_DEF(LAHF_SAHF, 0x80000001, 0, RTE_REG_ECX, 0) + FEAT_DEF(LZCNT, 0x80000001, 0, RTE_REG_ECX, 4) + + FEAT_DEF(SYSCALL, 0x80000001, 0, RTE_REG_EDX, 11) + FEAT_DEF(XD, 0x80000001, 0, RTE_REG_EDX, 20) + FEAT_DEF(1GB_PG, 0x80000001, 0, RTE_REG_EDX, 26) + FEAT_DEF(RDTSCP, 0x80000001, 0, RTE_REG_EDX, 27) + FEAT_DEF(EM64T, 0x80000001, 0, RTE_REG_EDX, 29) + + FEAT_DEF(INVTSC, 0x80000007, 0, RTE_REG_EDX, 8) +}; + +static inline void +rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out) +{ +#if defined(__i386__) && defined(__PIC__) + /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */ + asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0" + : "=r" (out[RTE_REG_EBX]), + "=a" (out[RTE_REG_EAX]), + "=c" (out[RTE_REG_ECX]), + "=d" (out[RTE_REG_EDX]) + : "a" (leaf), "c" (subleaf)); +#else + + asm volatile("cpuid" + : "=a" (out[RTE_REG_EAX]), + "=b" (out[RTE_REG_EBX]), + "=c" (out[RTE_REG_ECX]), + "=d" (out[RTE_REG_EDX]) + : "a" (leaf), "c" (subleaf)); + +#endif +} + +static inline int +rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature) +{ + const struct feature_entry *feat; + cpuid_registers_t regs; + + + if (feature >= RTE_CPUFLAG_NUMFLAGS) + /* Flag does not match anything in the feature tables */ + return -ENOENT; + + feat = &cpu_feature_table[feature]; + + if (!feat->leaf) + /* This entry in the table wasn't filled out! */ + return -EFAULT; + + rte_cpu_get_features(feat->leaf & 0xffff0000, 0, regs); + if (((regs[RTE_REG_EAX] ^ feat->leaf) & 0xffff0000) || + regs[RTE_REG_EAX] < feat->leaf) + return 0; + + /* get the cpuid leaf containing the desired feature */ + rte_cpu_get_features(feat->leaf, feat->subleaf, regs); + + /* check if the feature is enabled */ + return (regs[feat->reg] >> feat->bit) & 1; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CPUFLAGS_X86_64_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_cycles.h b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_cycles.h new file mode 100644 index 00000000..6e3c7d89 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_cycles.h @@ -0,0 +1,121 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* BSD LICENSE + * + * Copyright(c) 2013 6WIND. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CYCLES_X86_64_H_ +#define _RTE_CYCLES_X86_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_cycles.h" + +#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT +/* Global switch to use VMWARE mapping of TSC instead of RDTSC */ +extern int rte_cycles_vmware_tsc_map; +#include +#endif + +static inline uint64_t +rte_rdtsc(void) +{ + union { + uint64_t tsc_64; + struct { + uint32_t lo_32; + uint32_t hi_32; + }; + } tsc; + +#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT + if (unlikely(rte_cycles_vmware_tsc_map)) { + /* ecx = 0x10000 corresponds to the physical TSC for VMware */ + asm volatile("rdpmc" : + "=a" (tsc.lo_32), + "=d" (tsc.hi_32) : + "c"(0x10000)); + return tsc.tsc_64; + } +#endif + + asm volatile("rdtsc" : + "=a" (tsc.lo_32), + "=d" (tsc.hi_32)); + return tsc.tsc_64; +} + +static inline uint64_t +rte_rdtsc_precise(void) +{ + rte_mb(); + return rte_rdtsc(); +} + +static inline uint64_t +rte_get_tsc_cycles(void) { return rte_rdtsc(); } + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CYCLES_X86_64_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_memcpy.h new file mode 100644 index 00000000..6a574263 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_memcpy.h @@ -0,0 +1,639 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMCPY_X86_64_H_ +#define _RTE_MEMCPY_X86_64_H_ + +/** + * @file + * + * Functions for SSE/AVX/AVX2 implementation of memcpy(). + */ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Copy bytes from one location to another. The locations must not overlap. + * + * @note This is implemented as a macro, so it's address should not be taken + * and care is needed as parameter expressions may be evaluated multiple times. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + * @param n + * Number of bytes to copy. + * @return + * Pointer to the destination data. + */ +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) __attribute__((always_inline)); + +#ifdef RTE_MACHINE_CPUFLAG_AVX2 + +/** + * AVX2 implementation below + */ + +/** + * Copy 16 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + __m128i xmm0; + + xmm0 = _mm_loadu_si128((const __m128i *)src); + _mm_storeu_si128((__m128i *)dst, xmm0); +} + +/** + * Copy 32 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + __m256i ymm0; + + ymm0 = _mm256_loadu_si256((const __m256i *)src); + _mm256_storeu_si256((__m256i *)dst, ymm0); +} + +/** + * Copy 64 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32); + rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32); +} + +/** + * Copy 128 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32); + rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32); + rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32); + rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32); +} + +/** + * Copy 256 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32); + rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32); + rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32); + rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32); + rte_mov32((uint8_t *)dst + 4 * 32, (const uint8_t *)src + 4 * 32); + rte_mov32((uint8_t *)dst + 5 * 32, (const uint8_t *)src + 5 * 32); + rte_mov32((uint8_t *)dst + 6 * 32, (const uint8_t *)src + 6 * 32); + rte_mov32((uint8_t *)dst + 7 * 32, (const uint8_t *)src + 7 * 32); +} + +/** + * Copy 64-byte blocks from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov64blocks(uint8_t *dst, const uint8_t *src, size_t n) +{ + __m256i ymm0, ymm1; + + while (n >= 64) { + ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32)); + n -= 64; + ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32)); + src = (const uint8_t *)src + 64; + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1); + dst = (uint8_t *)dst + 64; + } +} + +/** + * Copy 256-byte blocks from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov256blocks(uint8_t *dst, const uint8_t *src, size_t n) +{ + __m256i ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7; + + while (n >= 256) { + ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32)); + n -= 256; + ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32)); + ymm2 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 2 * 32)); + ymm3 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 3 * 32)); + ymm4 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 4 * 32)); + ymm5 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 5 * 32)); + ymm6 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 6 * 32)); + ymm7 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 7 * 32)); + src = (const uint8_t *)src + 256; + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 2 * 32), ymm2); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 3 * 32), ymm3); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 4 * 32), ymm4); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 5 * 32), ymm5); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 6 * 32), ymm6); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 7 * 32), ymm7); + dst = (uint8_t *)dst + 256; + } +} + +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) +{ + uintptr_t dstu = (uintptr_t)dst; + uintptr_t srcu = (uintptr_t)src; + void *ret = dst; + size_t dstofss; + size_t bits; + + /** + * Copy less than 16 bytes + */ + if (n < 16) { + if (n & 0x01) { + *(uint8_t *)dstu = *(const uint8_t *)srcu; + srcu = (uintptr_t)((const uint8_t *)srcu + 1); + dstu = (uintptr_t)((uint8_t *)dstu + 1); + } + if (n & 0x02) { + *(uint16_t *)dstu = *(const uint16_t *)srcu; + srcu = (uintptr_t)((const uint16_t *)srcu + 1); + dstu = (uintptr_t)((uint16_t *)dstu + 1); + } + if (n & 0x04) { + *(uint32_t *)dstu = *(const uint32_t *)srcu; + srcu = (uintptr_t)((const uint32_t *)srcu + 1); + dstu = (uintptr_t)((uint32_t *)dstu + 1); + } + if (n & 0x08) { + *(uint64_t *)dstu = *(const uint64_t *)srcu; + } + return ret; + } + + /** + * Fast way when copy size doesn't exceed 512 bytes + */ + if (n <= 32) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + return ret; + } + if (n <= 64) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n); + return ret; + } + if (n <= 512) { + if (n >= 256) { + n -= 256; + rte_mov256((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 256; + dst = (uint8_t *)dst + 256; + } + if (n >= 128) { + n -= 128; + rte_mov128((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 128; + dst = (uint8_t *)dst + 128; + } + if (n >= 64) { + n -= 64; + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 64; + dst = (uint8_t *)dst + 64; + } +COPY_BLOCK_64_BACK31: + if (n > 32) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n); + return ret; + } + if (n > 0) { + rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n); + } + return ret; + } + + /** + * Make store aligned when copy size exceeds 512 bytes + */ + dstofss = 32 - ((uintptr_t)dst & 0x1F); + n -= dstofss; + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + dstofss; + dst = (uint8_t *)dst + dstofss; + + /** + * Copy 256-byte blocks. + * Use copy block function for better instruction order control, + * which is important when load is unaligned. + */ + rte_mov256blocks((uint8_t *)dst, (const uint8_t *)src, n); + bits = n; + n = n & 255; + bits -= n; + src = (const uint8_t *)src + bits; + dst = (uint8_t *)dst + bits; + + /** + * Copy 64-byte blocks. + * Use copy block function for better instruction order control, + * which is important when load is unaligned. + */ + if (n >= 64) { + rte_mov64blocks((uint8_t *)dst, (const uint8_t *)src, n); + bits = n; + n = n & 63; + bits -= n; + src = (const uint8_t *)src + bits; + dst = (uint8_t *)dst + bits; + } + + /** + * Copy whatever left + */ + goto COPY_BLOCK_64_BACK31; +} + +#else /* RTE_MACHINE_CPUFLAG_AVX2 */ + +/** + * SSE & AVX implementation below + */ + +/** + * Copy 16 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + __m128i xmm0; + + xmm0 = _mm_loadu_si128((const __m128i *)(const __m128i *)src); + _mm_storeu_si128((__m128i *)dst, xmm0); +} + +/** + * Copy 32 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); + rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16); +} + +/** + * Copy 64 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); + rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16); + rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16); + rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16); +} + +/** + * Copy 128 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); + rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16); + rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16); + rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16); + rte_mov16((uint8_t *)dst + 4 * 16, (const uint8_t *)src + 4 * 16); + rte_mov16((uint8_t *)dst + 5 * 16, (const uint8_t *)src + 5 * 16); + rte_mov16((uint8_t *)dst + 6 * 16, (const uint8_t *)src + 6 * 16); + rte_mov16((uint8_t *)dst + 7 * 16, (const uint8_t *)src + 7 * 16); +} + +/** + * Copy 256 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); + rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16); + rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16); + rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16); + rte_mov16((uint8_t *)dst + 4 * 16, (const uint8_t *)src + 4 * 16); + rte_mov16((uint8_t *)dst + 5 * 16, (const uint8_t *)src + 5 * 16); + rte_mov16((uint8_t *)dst + 6 * 16, (const uint8_t *)src + 6 * 16); + rte_mov16((uint8_t *)dst + 7 * 16, (const uint8_t *)src + 7 * 16); + rte_mov16((uint8_t *)dst + 8 * 16, (const uint8_t *)src + 8 * 16); + rte_mov16((uint8_t *)dst + 9 * 16, (const uint8_t *)src + 9 * 16); + rte_mov16((uint8_t *)dst + 10 * 16, (const uint8_t *)src + 10 * 16); + rte_mov16((uint8_t *)dst + 11 * 16, (const uint8_t *)src + 11 * 16); + rte_mov16((uint8_t *)dst + 12 * 16, (const uint8_t *)src + 12 * 16); + rte_mov16((uint8_t *)dst + 13 * 16, (const uint8_t *)src + 13 * 16); + rte_mov16((uint8_t *)dst + 14 * 16, (const uint8_t *)src + 14 * 16); + rte_mov16((uint8_t *)dst + 15 * 16, (const uint8_t *)src + 15 * 16); +} + +/** + * Macro for copying unaligned block from one location to another with constant load offset, + * 47 bytes leftover maximum, + * locations should not overlap. + * Requirements: + * - Store is aligned + * - Load offset is , which must be immediate value within [1, 15] + * - For , make sure bit backwards & <16 - offset> bit forwards are available for loading + * - , , must be variables + * - __m128i ~ must be pre-defined + */ +#define MOVEUNALIGNED_LEFT47_IMM(dst, src, len, offset) \ +({ \ + int tmp; \ + while (len >= 128 + 16 - offset) { \ + xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \ + len -= 128; \ + xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16)); \ + xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16)); \ + xmm3 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 3 * 16)); \ + xmm4 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 4 * 16)); \ + xmm5 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 5 * 16)); \ + xmm6 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 6 * 16)); \ + xmm7 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 7 * 16)); \ + xmm8 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 8 * 16)); \ + src = (const uint8_t *)src + 128; \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 2 * 16), _mm_alignr_epi8(xmm3, xmm2, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 3 * 16), _mm_alignr_epi8(xmm4, xmm3, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 4 * 16), _mm_alignr_epi8(xmm5, xmm4, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 5 * 16), _mm_alignr_epi8(xmm6, xmm5, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 6 * 16), _mm_alignr_epi8(xmm7, xmm6, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 7 * 16), _mm_alignr_epi8(xmm8, xmm7, offset)); \ + dst = (uint8_t *)dst + 128; \ + } \ + tmp = len; \ + len = ((len - 16 + offset) & 127) + 16 - offset; \ + tmp -= len; \ + src = (const uint8_t *)src + tmp; \ + dst = (uint8_t *)dst + tmp; \ + if (len >= 32 + 16 - offset) { \ + while (len >= 32 + 16 - offset) { \ + xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \ + len -= 32; \ + xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16)); \ + xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16)); \ + src = (const uint8_t *)src + 32; \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \ + dst = (uint8_t *)dst + 32; \ + } \ + tmp = len; \ + len = ((len - 16 + offset) & 31) + 16 - offset; \ + tmp -= len; \ + src = (const uint8_t *)src + tmp; \ + dst = (uint8_t *)dst + tmp; \ + } \ +}) + +/** + * Macro for copying unaligned block from one location to another, + * 47 bytes leftover maximum, + * locations should not overlap. + * Use switch here because the aligning instruction requires immediate value for shift count. + * Requirements: + * - Store is aligned + * - Load offset is , which must be within [1, 15] + * - For , make sure bit backwards & <16 - offset> bit forwards are available for loading + * - , , must be variables + * - __m128i ~ used in MOVEUNALIGNED_LEFT47_IMM must be pre-defined + */ +#define MOVEUNALIGNED_LEFT47(dst, src, len, offset) \ +({ \ + switch (offset) { \ + case 0x01: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x01); break; \ + case 0x02: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x02); break; \ + case 0x03: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x03); break; \ + case 0x04: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x04); break; \ + case 0x05: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x05); break; \ + case 0x06: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x06); break; \ + case 0x07: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x07); break; \ + case 0x08: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x08); break; \ + case 0x09: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x09); break; \ + case 0x0A: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0A); break; \ + case 0x0B: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0B); break; \ + case 0x0C: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0C); break; \ + case 0x0D: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0D); break; \ + case 0x0E: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0E); break; \ + case 0x0F: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0F); break; \ + default:; \ + } \ +}) + +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) +{ + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8; + uintptr_t dstu = (uintptr_t)dst; + uintptr_t srcu = (uintptr_t)src; + void *ret = dst; + size_t dstofss; + size_t srcofs; + + /** + * Copy less than 16 bytes + */ + if (n < 16) { + if (n & 0x01) { + *(uint8_t *)dstu = *(const uint8_t *)srcu; + srcu = (uintptr_t)((const uint8_t *)srcu + 1); + dstu = (uintptr_t)((uint8_t *)dstu + 1); + } + if (n & 0x02) { + *(uint16_t *)dstu = *(const uint16_t *)srcu; + srcu = (uintptr_t)((const uint16_t *)srcu + 1); + dstu = (uintptr_t)((uint16_t *)dstu + 1); + } + if (n & 0x04) { + *(uint32_t *)dstu = *(const uint32_t *)srcu; + srcu = (uintptr_t)((const uint32_t *)srcu + 1); + dstu = (uintptr_t)((uint32_t *)dstu + 1); + } + if (n & 0x08) { + *(uint64_t *)dstu = *(const uint64_t *)srcu; + } + return ret; + } + + /** + * Fast way when copy size doesn't exceed 512 bytes + */ + if (n <= 32) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + return ret; + } + if (n <= 48) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + return ret; + } + if (n <= 64) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst + 32, (const uint8_t *)src + 32); + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + return ret; + } + if (n <= 128) { + goto COPY_BLOCK_128_BACK15; + } + if (n <= 512) { + if (n >= 256) { + n -= 256; + rte_mov128((uint8_t *)dst, (const uint8_t *)src); + rte_mov128((uint8_t *)dst + 128, (const uint8_t *)src + 128); + src = (const uint8_t *)src + 256; + dst = (uint8_t *)dst + 256; + } +COPY_BLOCK_255_BACK15: + if (n >= 128) { + n -= 128; + rte_mov128((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 128; + dst = (uint8_t *)dst + 128; + } +COPY_BLOCK_128_BACK15: + if (n >= 64) { + n -= 64; + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 64; + dst = (uint8_t *)dst + 64; + } +COPY_BLOCK_64_BACK15: + if (n >= 32) { + n -= 32; + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 32; + dst = (uint8_t *)dst + 32; + } + if (n > 16) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + return ret; + } + if (n > 0) { + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + } + return ret; + } + + /** + * Make store aligned when copy size exceeds 512 bytes, + * and make sure the first 15 bytes are copied, because + * unaligned copy functions require up to 15 bytes + * backwards access. + */ + dstofss = 16 - ((uintptr_t)dst & 0x0F) + 16; + n -= dstofss; + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + dstofss; + dst = (uint8_t *)dst + dstofss; + srcofs = ((uintptr_t)src & 0x0F); + + /** + * For aligned copy + */ + if (srcofs == 0) { + /** + * Copy 256-byte blocks + */ + for (; n >= 256; n -= 256) { + rte_mov256((uint8_t *)dst, (const uint8_t *)src); + dst = (uint8_t *)dst + 256; + src = (const uint8_t *)src + 256; + } + + /** + * Copy whatever left + */ + goto COPY_BLOCK_255_BACK15; + } + + /** + * For copy with unaligned load + */ + MOVEUNALIGNED_LEFT47(dst, src, n, srcofs); + + /** + * Copy whatever left + */ + goto COPY_BLOCK_64_BACK15; +} + +#endif /* RTE_MACHINE_CPUFLAG_AVX2 */ + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCPY_X86_64_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_prefetch.h b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_prefetch.h new file mode 100644 index 00000000..8e6e02cc --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_prefetch.h @@ -0,0 +1,62 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PREFETCH_X86_64_H_ +#define _RTE_PREFETCH_X86_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_prefetch.h" + +static inline void rte_prefetch0(const volatile void *p) +{ + asm volatile ("prefetcht0 %[p]" : : [p] "m" (*(const volatile char *)p)); +} + +static inline void rte_prefetch1(const volatile void *p) +{ + asm volatile ("prefetcht1 %[p]" : : [p] "m" (*(const volatile char *)p)); +} + +static inline void rte_prefetch2(const volatile void *p) +{ + asm volatile ("prefetcht2 %[p]" : : [p] "m" (*(const volatile char *)p)); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PREFETCH_X86_64_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_rtm.h b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_rtm.h new file mode 100644 index 00000000..d9356419 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_rtm.h @@ -0,0 +1,73 @@ +#ifndef _RTE_RTM_H_ +#define _RTE_RTM_H_ 1 + +/* + * Copyright (c) 2012,2013 Intel Corporation + * Author: Andi Kleen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that: (1) source code distributions + * retain the above copyright notice and this paragraph in its entirety, (2) + * distributions including binary code include the above copyright notice and + * this paragraph in its entirety in the documentation or other materials + * provided with the distribution + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +/* Official RTM intrinsics interface matching gcc/icc, but works + on older gcc compatible compilers and binutils. */ + + +#ifdef __cplusplus +extern "C" { +#endif + + +#define RTE_XBEGIN_STARTED (~0u) +#define RTE_XABORT_EXPLICIT (1 << 0) +#define RTE_XABORT_RETRY (1 << 1) +#define RTE_XABORT_CONFLICT (1 << 2) +#define RTE_XABORT_CAPACITY (1 << 3) +#define RTE_XABORT_DEBUG (1 << 4) +#define RTE_XABORT_NESTED (1 << 5) +#define RTE_XABORT_CODE(x) (((x) >> 24) & 0xff) + +static __attribute__((__always_inline__)) inline +unsigned int rte_xbegin(void) +{ + unsigned int ret = RTE_XBEGIN_STARTED; + + asm volatile(".byte 0xc7,0xf8 ; .long 0" : "+a" (ret) :: "memory"); + return ret; +} + +static __attribute__((__always_inline__)) inline +void rte_xend(void) +{ + asm volatile(".byte 0x0f,0x01,0xd5" ::: "memory"); +} + +static __attribute__((__always_inline__)) inline +void rte_xabort(const unsigned int status) +{ + asm volatile(".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory"); +} + +static __attribute__((__always_inline__)) inline +int rte_xtest(void) +{ + unsigned char out; + + asm volatile(".byte 0x0f,0x01,0xd6 ; setnz %0" : + "=r" (out) :: "memory"); + return out; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RTM_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_rwlock.h b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_rwlock.h new file mode 100644 index 00000000..afd1c3c2 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_rwlock.h @@ -0,0 +1,82 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_RWLOCK_X86_64_H_ +#define _RTE_RWLOCK_X86_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_rwlock.h" +#include "rte_spinlock.h" + +static inline void +rte_rwlock_read_lock_tm(rte_rwlock_t *rwl) +{ + if (likely(rte_try_tm(&rwl->cnt))) + return; + rte_rwlock_read_lock(rwl); +} + +static inline void +rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl) +{ + if (unlikely(rwl->cnt)) + rte_rwlock_read_unlock(rwl); + else + rte_xend(); +} + +static inline void +rte_rwlock_write_lock_tm(rte_rwlock_t *rwl) +{ + if (likely(rte_try_tm(&rwl->cnt))) + return; + rte_rwlock_write_lock(rwl); +} + +static inline void +rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl) +{ + if (unlikely(rwl->cnt)) + rte_rwlock_write_unlock(rwl); + else + rte_xend(); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RWLOCK_X86_64_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_spinlock.h b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_spinlock.h new file mode 100644 index 00000000..20ef0a79 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_spinlock.h @@ -0,0 +1,201 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_SPINLOCK_X86_64_H_ +#define _RTE_SPINLOCK_X86_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_spinlock.h" +#include "rte_rtm.h" +#include "rte_cpuflags.h" +#include "rte_branch_prediction.h" +#include "rte_common.h" + +#define RTE_RTM_MAX_RETRIES (10) +#define RTE_XABORT_LOCK_BUSY (0xff) + +#ifndef RTE_FORCE_INTRINSICS +static inline void +rte_spinlock_lock(rte_spinlock_t *sl) +{ + int lock_val = 1; + asm volatile ( + "1:\n" + "xchg %[locked], %[lv]\n" + "test %[lv], %[lv]\n" + "jz 3f\n" + "2:\n" + "pause\n" + "cmpl $0, %[locked]\n" + "jnz 2b\n" + "jmp 1b\n" + "3:\n" + : [locked] "=m" (sl->locked), [lv] "=q" (lock_val) + : "[lv]" (lock_val) + : "memory"); +} + +static inline void +rte_spinlock_unlock (rte_spinlock_t *sl) +{ + int unlock_val = 0; + asm volatile ( + "xchg %[locked], %[ulv]\n" + : [locked] "=m" (sl->locked), [ulv] "=q" (unlock_val) + : "[ulv]" (unlock_val) + : "memory"); +} + +static inline int +rte_spinlock_trylock (rte_spinlock_t *sl) +{ + int lockval = 1; + + asm volatile ( + "xchg %[locked], %[lockval]" + : [locked] "=m" (sl->locked), [lockval] "=q" (lockval) + : "[lockval]" (lockval) + : "memory"); + + return (lockval == 0); +} +#endif + +static uint8_t rtm_supported; /* cache the flag to avoid the overhead + of the rte_cpu_get_flag_enabled function */ + +static inline void __attribute__((constructor)) +rte_rtm_init(void) +{ + rtm_supported = rte_cpu_get_flag_enabled(RTE_CPUFLAG_RTM); +} + +static inline int rte_tm_supported(void) +{ + return rtm_supported; +} + +static inline int +rte_try_tm(volatile int *lock) +{ + if (!rtm_supported) + return 0; + + int retries = RTE_RTM_MAX_RETRIES; + + while (likely(retries--)) { + + unsigned int status = rte_xbegin(); + + if (likely(RTE_XBEGIN_STARTED == status)) { + if (unlikely(*lock)) + rte_xabort(RTE_XABORT_LOCK_BUSY); + else + return 1; + } + while (*lock) + rte_pause(); + + if ((status & RTE_XABORT_EXPLICIT) && + (RTE_XABORT_CODE(status) == RTE_XABORT_LOCK_BUSY)) + continue; + + if ((status & RTE_XABORT_RETRY) == 0) /* do not retry */ + break; + } + return 0; +} + +static inline void +rte_spinlock_lock_tm(rte_spinlock_t *sl) +{ + if (likely(rte_try_tm(&sl->locked))) + return; + + rte_spinlock_lock(sl); /* fall-back */ +} + +static inline int +rte_spinlock_trylock_tm(rte_spinlock_t *sl) +{ + if (likely(rte_try_tm(&sl->locked))) + return 1; + + return rte_spinlock_trylock(sl); +} + +static inline void +rte_spinlock_unlock_tm(rte_spinlock_t *sl) +{ + if (unlikely(sl->locked)) + rte_spinlock_unlock(sl); + else + rte_xend(); +} + +static inline void +rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr) +{ + if (likely(rte_try_tm(&slr->sl.locked))) + return; + + rte_spinlock_recursive_lock(slr); /* fall-back */ +} + +static inline void +rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr) +{ + if (unlikely(slr->sl.locked)) + rte_spinlock_recursive_unlock(slr); + else + rte_xend(); +} + +static inline int +rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr) +{ + if (likely(rte_try_tm(&slr->sl.locked))) + return 1; + + return rte_spinlock_recursive_trylock(slr); +} + + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_SPINLOCK_X86_64_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_vect.h b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_vect.h new file mode 100644 index 00000000..b698797c --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/arch/x86/rte_vect.h @@ -0,0 +1,132 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_VECT_H_ +#define _RTE_VECT_H_ + +/** + * @file + * + * RTE SSE/AVX related header. + */ + +#if (defined(__ICC) || (__GNUC__ == 4 && __GNUC_MINOR__ < 4)) + +#ifdef __SSE__ +#include +#endif + +#ifdef __SSE2__ +#include +#endif + +#ifdef __SSE3__ +#include +#endif + +#if defined(__SSE4_2__) || defined(__SSE4_1__) +#include +#endif + +#if defined(__AVX__) +#include +#endif + +#else + +#include + +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef __m128i xmm_t; + +#define XMM_SIZE (sizeof(xmm_t)) +#define XMM_MASK (XMM_SIZE - 1) + +typedef union rte_xmm { + xmm_t x; + uint8_t u8[XMM_SIZE / sizeof(uint8_t)]; + uint16_t u16[XMM_SIZE / sizeof(uint16_t)]; + uint32_t u32[XMM_SIZE / sizeof(uint32_t)]; + uint64_t u64[XMM_SIZE / sizeof(uint64_t)]; + double pd[XMM_SIZE / sizeof(double)]; +} rte_xmm_t; + +#ifdef __AVX__ + +typedef __m256i ymm_t; + +#define YMM_SIZE (sizeof(ymm_t)) +#define YMM_MASK (YMM_SIZE - 1) + +typedef union rte_ymm { + ymm_t y; + xmm_t x[YMM_SIZE / sizeof(xmm_t)]; + uint8_t u8[YMM_SIZE / sizeof(uint8_t)]; + uint16_t u16[YMM_SIZE / sizeof(uint16_t)]; + uint32_t u32[YMM_SIZE / sizeof(uint32_t)]; + uint64_t u64[YMM_SIZE / sizeof(uint64_t)]; + double pd[YMM_SIZE / sizeof(double)]; +} rte_ymm_t; + +#endif /* __AVX__ */ + +#ifdef RTE_ARCH_I686 +#define _mm_cvtsi128_si64(a) ({ \ + rte_xmm_t m; \ + m.x = (a); \ + (m.u64[0]); \ +}) +#endif + +/* + * Prior to version 12.1 icc doesn't support _mm_set_epi64x. + */ +#if (defined(__ICC) && __ICC < 1210) +#define _mm_set_epi64x(a, b) ({ \ + rte_xmm_t m; \ + m.u64[0] = b; \ + m.u64[1] = a; \ + (m.x); \ +}) +#endif /* (defined(__ICC) && __ICC < 1210) */ + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_VECT_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/generic/rte_atomic.h b/src/dpdk22/lib/librte_eal/common/include/generic/rte_atomic.h new file mode 100644 index 00000000..26d1f56d --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/generic/rte_atomic.h @@ -0,0 +1,945 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ATOMIC_H_ +#define _RTE_ATOMIC_H_ + +/** + * @file + * Atomic Operations + * + * This file defines a generic API for atomic operations. + */ + +#include + +#ifdef __DOXYGEN__ + +/** + * General memory barrier. + * + * Guarantees that the LOAD and STORE operations generated before the + * barrier occur before the LOAD and STORE operations generated after. + * This function is architecture dependent. + */ +static inline void rte_mb(void); + +/** + * Write memory barrier. + * + * Guarantees that the STORE operations generated before the barrier + * occur before the STORE operations generated after. + * This function is architecture dependent. + */ +static inline void rte_wmb(void); + +/** + * Read memory barrier. + * + * Guarantees that the LOAD operations generated before the barrier + * occur before the LOAD operations generated after. + * This function is architecture dependent. + */ +static inline void rte_rmb(void); + +/** + * General memory barrier between lcores + * + * Guarantees that the LOAD and STORE operations that precede the + * rte_smp_mb() call are globally visible across the lcores + * before the the LOAD and STORE operations that follows it. + */ +static inline void rte_smp_mb(void); + +/** + * Write memory barrier between lcores + * + * Guarantees that the STORE operations that precede the + * rte_smp_wmb() call are globally visible across the lcores + * before the the STORE operations that follows it. + */ +static inline void rte_smp_wmb(void); + +/** + * Read memory barrier between lcores + * + * Guarantees that the LOAD operations that precede the + * rte_smp_rmb() call are globally visible across the lcores + * before the the LOAD operations that follows it. + */ +static inline void rte_smp_rmb(void); + +#endif /* __DOXYGEN__ */ + +/** + * Compiler barrier. + * + * Guarantees that operation reordering does not occur at compile time + * for operations directly before and after the barrier. + */ +#define rte_compiler_barrier() do { \ + asm volatile ("" : : : "memory"); \ +} while(0) + +/*------------------------- 16 bit atomic operations -------------------------*/ + +/** + * Atomic compare and set. + * + * (atomic) equivalent to: + * if (*dst == exp) + * *dst = src (all 16-bit words) + * + * @param dst + * The destination location into which the value will be written. + * @param exp + * The expected value. + * @param src + * The new value. + * @return + * Non-zero on success; 0 on failure. + */ +static inline int +rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src); + +#ifdef RTE_FORCE_INTRINSICS +static inline int +rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src) +{ + return __sync_bool_compare_and_swap(dst, exp, src); +} +#endif + +/** + * The atomic counter structure. + */ +typedef struct { + volatile int16_t cnt; /**< An internal counter value. */ +} rte_atomic16_t; + +/** + * Static initializer for an atomic counter. + */ +#define RTE_ATOMIC16_INIT(val) { (val) } + +/** + * Initialize an atomic counter. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic16_init(rte_atomic16_t *v) +{ + v->cnt = 0; +} + +/** + * Atomically read a 16-bit value from a counter. + * + * @param v + * A pointer to the atomic counter. + * @return + * The value of the counter. + */ +static inline int16_t +rte_atomic16_read(const rte_atomic16_t *v) +{ + return v->cnt; +} + +/** + * Atomically set a counter to a 16-bit value. + * + * @param v + * A pointer to the atomic counter. + * @param new_value + * The new value for the counter. + */ +static inline void +rte_atomic16_set(rte_atomic16_t *v, int16_t new_value) +{ + v->cnt = new_value; +} + +/** + * Atomically add a 16-bit value to an atomic counter. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + */ +static inline void +rte_atomic16_add(rte_atomic16_t *v, int16_t inc) +{ + __sync_fetch_and_add(&v->cnt, inc); +} + +/** + * Atomically subtract a 16-bit value from an atomic counter. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + */ +static inline void +rte_atomic16_sub(rte_atomic16_t *v, int16_t dec) +{ + __sync_fetch_and_sub(&v->cnt, dec); +} + +/** + * Atomically increment a counter by one. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic16_inc(rte_atomic16_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic16_inc(rte_atomic16_t *v) +{ + rte_atomic16_add(v, 1); +} +#endif + +/** + * Atomically decrement a counter by one. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic16_dec(rte_atomic16_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic16_dec(rte_atomic16_t *v) +{ + rte_atomic16_sub(v, 1); +} +#endif + +/** + * Atomically add a 16-bit value to a counter and return the result. + * + * Atomically adds the 16-bits value (inc) to the atomic counter (v) and + * returns the value of v after addition. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + * @return + * The value of v after the addition. + */ +static inline int16_t +rte_atomic16_add_return(rte_atomic16_t *v, int16_t inc) +{ + return __sync_add_and_fetch(&v->cnt, inc); +} + +/** + * Atomically subtract a 16-bit value from a counter and return + * the result. + * + * Atomically subtracts the 16-bit value (inc) from the atomic counter + * (v) and returns the value of v after the subtraction. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + * @return + * The value of v after the subtraction. + */ +static inline int16_t +rte_atomic16_sub_return(rte_atomic16_t *v, int16_t dec) +{ + return __sync_sub_and_fetch(&v->cnt, dec); +} + +/** + * Atomically increment a 16-bit counter by one and test. + * + * Atomically increments the atomic counter (v) by one and returns true if + * the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after the increment operation is 0; false otherwise. + */ +static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v) +{ + return (__sync_add_and_fetch(&v->cnt, 1) == 0); +} +#endif + +/** + * Atomically decrement a 16-bit counter by one and test. + * + * Atomically decrements the atomic counter (v) by one and returns true if + * the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after the decrement operation is 0; false otherwise. + */ +static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v) +{ + return (__sync_sub_and_fetch(&v->cnt, 1) == 0); +} +#endif + +/** + * Atomically test and set a 16-bit atomic counter. + * + * If the counter value is already set, return 0 (failed). Otherwise, set + * the counter value to 1 and return 1 (success). + * + * @param v + * A pointer to the atomic counter. + * @return + * 0 if failed; else 1, success. + */ +static inline int rte_atomic16_test_and_set(rte_atomic16_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic16_test_and_set(rte_atomic16_t *v) +{ + return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1); +} +#endif + +/** + * Atomically set a 16-bit counter to 0. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void rte_atomic16_clear(rte_atomic16_t *v) +{ + v->cnt = 0; +} + +/*------------------------- 32 bit atomic operations -------------------------*/ + +/** + * Atomic compare and set. + * + * (atomic) equivalent to: + * if (*dst == exp) + * *dst = src (all 32-bit words) + * + * @param dst + * The destination location into which the value will be written. + * @param exp + * The expected value. + * @param src + * The new value. + * @return + * Non-zero on success; 0 on failure. + */ +static inline int +rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src); + +#ifdef RTE_FORCE_INTRINSICS +static inline int +rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) +{ + return __sync_bool_compare_and_swap(dst, exp, src); +} +#endif + +/** + * The atomic counter structure. + */ +typedef struct { + volatile int32_t cnt; /**< An internal counter value. */ +} rte_atomic32_t; + +/** + * Static initializer for an atomic counter. + */ +#define RTE_ATOMIC32_INIT(val) { (val) } + +/** + * Initialize an atomic counter. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic32_init(rte_atomic32_t *v) +{ + v->cnt = 0; +} + +/** + * Atomically read a 32-bit value from a counter. + * + * @param v + * A pointer to the atomic counter. + * @return + * The value of the counter. + */ +static inline int32_t +rte_atomic32_read(const rte_atomic32_t *v) +{ + return v->cnt; +} + +/** + * Atomically set a counter to a 32-bit value. + * + * @param v + * A pointer to the atomic counter. + * @param new_value + * The new value for the counter. + */ +static inline void +rte_atomic32_set(rte_atomic32_t *v, int32_t new_value) +{ + v->cnt = new_value; +} + +/** + * Atomically add a 32-bit value to an atomic counter. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + */ +static inline void +rte_atomic32_add(rte_atomic32_t *v, int32_t inc) +{ + __sync_fetch_and_add(&v->cnt, inc); +} + +/** + * Atomically subtract a 32-bit value from an atomic counter. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + */ +static inline void +rte_atomic32_sub(rte_atomic32_t *v, int32_t dec) +{ + __sync_fetch_and_sub(&v->cnt, dec); +} + +/** + * Atomically increment a counter by one. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic32_inc(rte_atomic32_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic32_inc(rte_atomic32_t *v) +{ + rte_atomic32_add(v, 1); +} +#endif + +/** + * Atomically decrement a counter by one. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic32_dec(rte_atomic32_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic32_dec(rte_atomic32_t *v) +{ + rte_atomic32_sub(v,1); +} +#endif + +/** + * Atomically add a 32-bit value to a counter and return the result. + * + * Atomically adds the 32-bits value (inc) to the atomic counter (v) and + * returns the value of v after addition. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + * @return + * The value of v after the addition. + */ +static inline int32_t +rte_atomic32_add_return(rte_atomic32_t *v, int32_t inc) +{ + return __sync_add_and_fetch(&v->cnt, inc); +} + +/** + * Atomically subtract a 32-bit value from a counter and return + * the result. + * + * Atomically subtracts the 32-bit value (inc) from the atomic counter + * (v) and returns the value of v after the subtraction. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + * @return + * The value of v after the subtraction. + */ +static inline int32_t +rte_atomic32_sub_return(rte_atomic32_t *v, int32_t dec) +{ + return __sync_sub_and_fetch(&v->cnt, dec); +} + +/** + * Atomically increment a 32-bit counter by one and test. + * + * Atomically increments the atomic counter (v) by one and returns true if + * the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after the increment operation is 0; false otherwise. + */ +static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v) +{ + return (__sync_add_and_fetch(&v->cnt, 1) == 0); +} +#endif + +/** + * Atomically decrement a 32-bit counter by one and test. + * + * Atomically decrements the atomic counter (v) by one and returns true if + * the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after the decrement operation is 0; false otherwise. + */ +static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v) +{ + return (__sync_sub_and_fetch(&v->cnt, 1) == 0); +} +#endif + +/** + * Atomically test and set a 32-bit atomic counter. + * + * If the counter value is already set, return 0 (failed). Otherwise, set + * the counter value to 1 and return 1 (success). + * + * @param v + * A pointer to the atomic counter. + * @return + * 0 if failed; else 1, success. + */ +static inline int rte_atomic32_test_and_set(rte_atomic32_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic32_test_and_set(rte_atomic32_t *v) +{ + return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1); +} +#endif + +/** + * Atomically set a 32-bit counter to 0. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void rte_atomic32_clear(rte_atomic32_t *v) +{ + v->cnt = 0; +} + +/*------------------------- 64 bit atomic operations -------------------------*/ + +/** + * An atomic compare and set function used by the mutex functions. + * (atomic) equivalent to: + * if (*dst == exp) + * *dst = src (all 64-bit words) + * + * @param dst + * The destination into which the value will be written. + * @param exp + * The expected value. + * @param src + * The new value. + * @return + * Non-zero on success; 0 on failure. + */ +static inline int +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src); + +#ifdef RTE_FORCE_INTRINSICS +static inline int +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) +{ + return __sync_bool_compare_and_swap(dst, exp, src); +} +#endif + +/** + * The atomic counter structure. + */ +typedef struct { + volatile int64_t cnt; /**< Internal counter value. */ +} rte_atomic64_t; + +/** + * Static initializer for an atomic counter. + */ +#define RTE_ATOMIC64_INIT(val) { (val) } + +/** + * Initialize the atomic counter. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic64_init(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_init(rte_atomic64_t *v) +{ +#ifdef __LP64__ + v->cnt = 0; +#else + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, 0); + } +#endif +} +#endif + +/** + * Atomically read a 64-bit counter. + * + * @param v + * A pointer to the atomic counter. + * @return + * The value of the counter. + */ +static inline int64_t +rte_atomic64_read(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int64_t +rte_atomic64_read(rte_atomic64_t *v) +{ +#ifdef __LP64__ + return v->cnt; +#else + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + /* replace the value by itself */ + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp); + } + return tmp; +#endif +} +#endif + +/** + * Atomically set a 64-bit counter. + * + * @param v + * A pointer to the atomic counter. + * @param new_value + * The new value of the counter. + */ +static inline void +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) +{ +#ifdef __LP64__ + v->cnt = new_value; +#else + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, new_value); + } +#endif +} +#endif + +/** + * Atomically add a 64-bit value to a counter. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + */ +static inline void +rte_atomic64_add(rte_atomic64_t *v, int64_t inc); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_add(rte_atomic64_t *v, int64_t inc) +{ + __sync_fetch_and_add(&v->cnt, inc); +} +#endif + +/** + * Atomically subtract a 64-bit value from a counter. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + */ +static inline void +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) +{ + __sync_fetch_and_sub(&v->cnt, dec); +} +#endif + +/** + * Atomically increment a 64-bit counter by one and test. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic64_inc(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_inc(rte_atomic64_t *v) +{ + rte_atomic64_add(v, 1); +} +#endif + +/** + * Atomically decrement a 64-bit counter by one and test. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic64_dec(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_dec(rte_atomic64_t *v) +{ + rte_atomic64_sub(v, 1); +} +#endif + +/** + * Add a 64-bit value to an atomic counter and return the result. + * + * Atomically adds the 64-bit value (inc) to the atomic counter (v) and + * returns the value of v after the addition. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + * @return + * The value of v after the addition. + */ +static inline int64_t +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc); + +#ifdef RTE_FORCE_INTRINSICS +static inline int64_t +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) +{ + return __sync_add_and_fetch(&v->cnt, inc); +} +#endif + +/** + * Subtract a 64-bit value from an atomic counter and return the result. + * + * Atomically subtracts the 64-bit value (dec) from the atomic counter (v) + * and returns the value of v after the subtraction. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + * @return + * The value of v after the subtraction. + */ +static inline int64_t +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec); + +#ifdef RTE_FORCE_INTRINSICS +static inline int64_t +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) +{ + return __sync_sub_and_fetch(&v->cnt, dec); +} +#endif + +/** + * Atomically increment a 64-bit counter by one and test. + * + * Atomically increments the atomic counter (v) by one and returns + * true if the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after the addition is 0; false otherwise. + */ +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v) +{ + return rte_atomic64_add_return(v, 1) == 0; +} +#endif + +/** + * Atomically decrement a 64-bit counter by one and test. + * + * Atomically decrements the atomic counter (v) by one and returns true if + * the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after subtraction is 0; false otherwise. + */ +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v) +{ + return rte_atomic64_sub_return(v, 1) == 0; +} +#endif + +/** + * Atomically test and set a 64-bit atomic counter. + * + * If the counter value is already set, return 0 (failed). Otherwise, set + * the counter value to 1 and return 1 (success). + * + * @param v + * A pointer to the atomic counter. + * @return + * 0 if failed; else 1, success. + */ +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) +{ + return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); +} +#endif + +/** + * Atomically set a 64-bit counter to 0. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void rte_atomic64_clear(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void rte_atomic64_clear(rte_atomic64_t *v) +{ + rte_atomic64_set(v, 0); +} +#endif + +#endif /* _RTE_ATOMIC_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/generic/rte_byteorder.h b/src/dpdk22/lib/librte_eal/common/include/generic/rte_byteorder.h new file mode 100644 index 00000000..c46fdcf2 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/generic/rte_byteorder.h @@ -0,0 +1,217 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BYTEORDER_H_ +#define _RTE_BYTEORDER_H_ + +/** + * @file + * + * Byte Swap Operations + * + * This file defines a generic API for byte swap operations. Part of + * the implementation is architecture-specific. + */ + +#include +#ifdef RTE_EXEC_ENV_BSDAPP +#include +#else +#include +#endif + +/* + * Compile-time endianness detection + */ +#define RTE_BIG_ENDIAN 1 +#define RTE_LITTLE_ENDIAN 2 +#if defined __BYTE_ORDER__ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define RTE_BYTE_ORDER RTE_BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN +#endif /* __BYTE_ORDER__ */ +#elif defined __BYTE_ORDER +#if __BYTE_ORDER == __BIG_ENDIAN +#define RTE_BYTE_ORDER RTE_BIG_ENDIAN +#elif __BYTE_ORDER == __LITTLE_ENDIAN +#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN +#endif /* __BYTE_ORDER */ +#elif defined __BIG_ENDIAN__ +#define RTE_BYTE_ORDER RTE_BIG_ENDIAN +#elif defined __LITTLE_ENDIAN__ +#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN +#endif + +/* + * An internal function to swap bytes in a 16-bit value. + * + * It is used by rte_bswap16() when the value is constant. Do not use + * this function directly; rte_bswap16() is preferred. + */ +static inline uint16_t +rte_constant_bswap16(uint16_t x) +{ + return (uint16_t)(((x & 0x00ffU) << 8) | + ((x & 0xff00U) >> 8)); +} + +/* + * An internal function to swap bytes in a 32-bit value. + * + * It is used by rte_bswap32() when the value is constant. Do not use + * this function directly; rte_bswap32() is preferred. + */ +static inline uint32_t +rte_constant_bswap32(uint32_t x) +{ + return ((x & 0x000000ffUL) << 24) | + ((x & 0x0000ff00UL) << 8) | + ((x & 0x00ff0000UL) >> 8) | + ((x & 0xff000000UL) >> 24); +} + +/* + * An internal function to swap bytes of a 64-bit value. + * + * It is used by rte_bswap64() when the value is constant. Do not use + * this function directly; rte_bswap64() is preferred. + */ +static inline uint64_t +rte_constant_bswap64(uint64_t x) +{ + return ((x & 0x00000000000000ffULL) << 56) | + ((x & 0x000000000000ff00ULL) << 40) | + ((x & 0x0000000000ff0000ULL) << 24) | + ((x & 0x00000000ff000000ULL) << 8) | + ((x & 0x000000ff00000000ULL) >> 8) | + ((x & 0x0000ff0000000000ULL) >> 24) | + ((x & 0x00ff000000000000ULL) >> 40) | + ((x & 0xff00000000000000ULL) >> 56); +} + + +#ifdef __DOXYGEN__ + +/** + * Swap bytes in a 16-bit value. + */ +static uint16_t rte_bswap16(uint16_t _x); + +/** + * Swap bytes in a 32-bit value. + */ +static uint32_t rte_bswap32(uint32_t x); + +/** + * Swap bytes in a 64-bit value. + */ +static uint64_t rte_bswap64(uint64_t x); + +/** + * Convert a 16-bit value from CPU order to little endian. + */ +static uint16_t rte_cpu_to_le_16(uint16_t x); + +/** + * Convert a 32-bit value from CPU order to little endian. + */ +static uint32_t rte_cpu_to_le_32(uint32_t x); + +/** + * Convert a 64-bit value from CPU order to little endian. + */ +static uint64_t rte_cpu_to_le_64(uint64_t x); + + +/** + * Convert a 16-bit value from CPU order to big endian. + */ +static uint16_t rte_cpu_to_be_16(uint16_t x); + +/** + * Convert a 32-bit value from CPU order to big endian. + */ +static uint32_t rte_cpu_to_be_32(uint32_t x); + +/** + * Convert a 64-bit value from CPU order to big endian. + */ +static uint64_t rte_cpu_to_be_64(uint64_t x); + + +/** + * Convert a 16-bit value from little endian to CPU order. + */ +static uint16_t rte_le_to_cpu_16(uint16_t x); + +/** + * Convert a 32-bit value from little endian to CPU order. + */ +static uint32_t rte_le_to_cpu_32(uint32_t x); + +/** + * Convert a 64-bit value from little endian to CPU order. + */ +static uint64_t rte_le_to_cpu_64(uint64_t x); + + +/** + * Convert a 16-bit value from big endian to CPU order. + */ +static uint16_t rte_be_to_cpu_16(uint16_t x); + +/** + * Convert a 32-bit value from big endian to CPU order. + */ +static uint32_t rte_be_to_cpu_32(uint32_t x); + +/** + * Convert a 64-bit value from big endian to CPU order. + */ +static uint64_t rte_be_to_cpu_64(uint64_t x); + +#endif /* __DOXYGEN__ */ + +#ifdef RTE_FORCE_INTRINSICS +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) +#define rte_bswap16(x) __builtin_bswap16(x) +#endif + +#define rte_bswap32(x) __builtin_bswap32(x) + +#define rte_bswap64(x) __builtin_bswap64(x) + +#endif + +#endif /* _RTE_BYTEORDER_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/generic/rte_cpuflags.h b/src/dpdk22/lib/librte_eal/common/include/generic/rte_cpuflags.h new file mode 100644 index 00000000..5738a2a7 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/generic/rte_cpuflags.h @@ -0,0 +1,120 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CPUFLAGS_H_ +#define _RTE_CPUFLAGS_H_ + +/** + * @file + * Architecture specific API to determine available CPU features at runtime. + */ + +#include +#include +#include +#include + +/** + * Enumeration of all CPU features supported + */ +#ifdef __DOXYGEN__ +enum rte_cpu_flag_t; +#endif + +/** + * Enumeration of CPU registers + */ +#ifdef __DOXYGEN__ +enum cpu_register_t; +#endif + +typedef uint32_t cpuid_registers_t[4]; + +#define CPU_FLAG_NAME_MAX_LEN 64 + +/** + * Struct to hold a processor feature entry + */ +struct feature_entry { + uint32_t leaf; /**< cpuid leaf */ + uint32_t subleaf; /**< cpuid subleaf */ + uint32_t reg; /**< cpuid register */ + uint32_t bit; /**< cpuid register bit */ + char name[CPU_FLAG_NAME_MAX_LEN]; /**< String for printing */ +}; + +#define FEAT_DEF(name, leaf, subleaf, reg, bit) \ + [RTE_CPUFLAG_##name] = {leaf, subleaf, reg, bit, #name }, + +/** + * An array that holds feature entries + * + * Defined in arch-specific rte_cpuflags.h. + */ +#ifdef __DOXYGEN__ +static const struct feature_entry cpu_feature_table[]; +#endif + +/** + * Execute CPUID instruction and get contents of a specific register + * + * This function, when compiled with GCC, will generate architecture-neutral + * code, as per GCC manual. + */ +static inline void +rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out); + +/** + * Function for checking a CPU flag availability + * + * @param feature + * CPU flag to query CPU for + * @return + * 1 if flag is available + * 0 if flag is not available + * -ENOENT if flag is invalid + */ +#ifdef __DOXYGEN__ +static inline int +rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature); +#endif + +/** + * This function checks that the currently used CPU supports the CPU features + * that were specified at compile time. It is called automatically within the + * EAL, so does not need to be used by applications. + */ +void +rte_cpu_check_supported(void); + +#endif /* _RTE_CPUFLAGS_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/generic/rte_cycles.h b/src/dpdk22/lib/librte_eal/common/include/generic/rte_cycles.h new file mode 100644 index 00000000..8cc21f20 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/generic/rte_cycles.h @@ -0,0 +1,205 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* BSD LICENSE + * + * Copyright(c) 2013 6WIND. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CYCLES_H_ +#define _RTE_CYCLES_H_ + +/** + * @file + * + * Simple Time Reference Functions (Cycles and HPET). + */ + +#include +#include +#include + +#define MS_PER_S 1000 +#define US_PER_S 1000000 +#define NS_PER_S 1000000000 + +enum timer_source { + EAL_TIMER_TSC = 0, + EAL_TIMER_HPET +}; +extern enum timer_source eal_timer_source; + +/** + * Get the measured frequency of the RDTSC counter + * + * @return + * The TSC frequency for this lcore + */ +uint64_t +rte_get_tsc_hz(void); + +/** + * Return the number of TSC cycles since boot + * + * @return + * the number of cycles + */ +static inline uint64_t +rte_get_tsc_cycles(void); + +#ifdef RTE_LIBEAL_USE_HPET +/** + * Return the number of HPET cycles since boot + * + * This counter is global for all execution units. The number of + * cycles in one second can be retrieved using rte_get_hpet_hz(). + * + * @return + * the number of cycles + */ +uint64_t +rte_get_hpet_cycles(void); + +/** + * Get the number of HPET cycles in one second. + * + * @return + * The number of cycles in one second. + */ +uint64_t +rte_get_hpet_hz(void); + +/** + * Initialise the HPET for use. This must be called before the rte_get_hpet_hz + * and rte_get_hpet_cycles APIs are called. If this function does not succeed, + * then the HPET functions are unavailable and should not be called. + * + * @param make_default + * If set, the hpet timer becomes the default timer whose values are + * returned by the rte_get_timer_hz/cycles API calls + * + * @return + * 0 on success, + * -1 on error, and the make_default parameter is ignored. + */ +int rte_eal_hpet_init(int make_default); + +#endif + +/** + * Get the number of cycles since boot from the default timer. + * + * @return + * The number of cycles + */ +static inline uint64_t +rte_get_timer_cycles(void) +{ + switch(eal_timer_source) { + case EAL_TIMER_TSC: + return rte_get_tsc_cycles(); + case EAL_TIMER_HPET: +#ifdef RTE_LIBEAL_USE_HPET + return rte_get_hpet_cycles(); +#endif + default: rte_panic("Invalid timer source specified\n"); + } +} + +/** + * Get the number of cycles in one second for the default timer. + * + * @return + * The number of cycles in one second. + */ +static inline uint64_t +rte_get_timer_hz(void) +{ + switch(eal_timer_source) { + case EAL_TIMER_TSC: + return rte_get_tsc_hz(); + case EAL_TIMER_HPET: +#ifdef RTE_LIBEAL_USE_HPET + return rte_get_hpet_hz(); +#endif + default: rte_panic("Invalid timer source specified\n"); + } +} + +/** + * Wait at least us microseconds. + * + * @param us + * The number of microseconds to wait. + */ +void +rte_delay_us(unsigned us); + +/** + * Wait at least ms milliseconds. + * + * @param ms + * The number of milliseconds to wait. + */ +static inline void +rte_delay_ms(unsigned ms) +{ + rte_delay_us(ms * 1000); +} + +#endif /* _RTE_CYCLES_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/generic/rte_memcpy.h b/src/dpdk22/lib/librte_eal/common/include/generic/rte_memcpy.h new file mode 100644 index 00000000..03e84773 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/generic/rte_memcpy.h @@ -0,0 +1,144 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMCPY_H_ +#define _RTE_MEMCPY_H_ + +/** + * @file + * + * Functions for vectorised implementation of memcpy(). + */ + +/** + * Copy 16 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src); + +/** + * Copy 32 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src); + +/** + * Copy 48 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov48(uint8_t *dst, const uint8_t *src); + +/** + * Copy 64 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src); + +/** + * Copy 128 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src); + +/** + * Copy 256 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src); + +#ifdef __DOXYGEN__ + +/** + * Copy bytes from one location to another. The locations must not overlap. + * + * @note This is implemented as a macro, so it's address should not be taken + * and care is needed as parameter expressions may be evaluated multiple times. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + * @param n + * Number of bytes to copy. + * @return + * Pointer to the destination data. + */ +static void * +rte_memcpy(void *dst, const void *src, size_t n); + +#endif /* __DOXYGEN__ */ + +/* + * memcpy() function used by rte_memcpy macro + */ +static inline void * +rte_memcpy_func(void *dst, const void *src, size_t n) __attribute__((always_inline)); + + +#endif /* _RTE_MEMCPY_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/generic/rte_prefetch.h b/src/dpdk22/lib/librte_eal/common/include/generic/rte_prefetch.h new file mode 100644 index 00000000..725715ff --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/generic/rte_prefetch.h @@ -0,0 +1,71 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PREFETCH_H_ +#define _RTE_PREFETCH_H_ + +/** + * @file + * + * Prefetch operations. + * + * This file defines an API for prefetch macros / inline-functions, + * which are architecture-dependent. Prefetching occurs when a + * processor requests an instruction or data from memory to cache + * before it is actually needed, potentially speeding up the execution of the + * program. + */ + +/** + * Prefetch a cache line into all cache levels. + * @param p + * Address to prefetch + */ +static inline void rte_prefetch0(const volatile void *p); + +/** + * Prefetch a cache line into all cache levels except the 0th cache level. + * @param p + * Address to prefetch + */ +static inline void rte_prefetch1(const volatile void *p); + +/** + * Prefetch a cache line into all cache levels except the 0th and 1th cache + * levels. + * @param p + * Address to prefetch + */ +static inline void rte_prefetch2(const volatile void *p); + +#endif /* _RTE_PREFETCH_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/generic/rte_rwlock.h b/src/dpdk22/lib/librte_eal/common/include/generic/rte_rwlock.h new file mode 100644 index 00000000..7a0fdc55 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/generic/rte_rwlock.h @@ -0,0 +1,208 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_RWLOCK_H_ +#define _RTE_RWLOCK_H_ + +/** + * @file + * + * RTE Read-Write Locks + * + * This file defines an API for read-write locks. The lock is used to + * protect data that allows multiple readers in parallel, but only + * one writer. All readers are blocked until the writer is finished + * writing. + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/** + * The rte_rwlock_t type. + * + * cnt is -1 when write lock is held, and > 0 when read locks are held. + */ +typedef struct { + volatile int32_t cnt; /**< -1 when W lock held, > 0 when R locks held. */ +} rte_rwlock_t; + +/** + * A static rwlock initializer. + */ +#define RTE_RWLOCK_INITIALIZER { 0 } + +/** + * Initialize the rwlock to an unlocked state. + * + * @param rwl + * A pointer to the rwlock structure. + */ +static inline void +rte_rwlock_init(rte_rwlock_t *rwl) +{ + rwl->cnt = 0; +} + +/** + * Take a read lock. Loop until the lock is held. + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_read_lock(rte_rwlock_t *rwl) +{ + int32_t x; + int success = 0; + + while (success == 0) { + x = rwl->cnt; + /* write lock is held */ + if (x < 0) { + rte_pause(); + continue; + } + success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt, + x, x + 1); + } +} + +/** + * Release a read lock. + * + * @param rwl + * A pointer to the rwlock structure. + */ +static inline void +rte_rwlock_read_unlock(rte_rwlock_t *rwl) +{ + rte_atomic32_dec((rte_atomic32_t *)(intptr_t)&rwl->cnt); +} + +/** + * Take a write lock. Loop until the lock is held. + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_write_lock(rte_rwlock_t *rwl) +{ + int32_t x; + int success = 0; + + while (success == 0) { + x = rwl->cnt; + /* a lock is held */ + if (x != 0) { + rte_pause(); + continue; + } + success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt, + 0, -1); + } +} + +/** + * Release a write lock. + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_write_unlock(rte_rwlock_t *rwl) +{ + rte_atomic32_inc((rte_atomic32_t *)(intptr_t)&rwl->cnt); +} + +/** + * Try to execute critical section in a hardware memory transaction, if it + * fails or not available take a read lock + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_read_lock_tm(rte_rwlock_t *rwl); + +/** + * Commit hardware memory transaction or release the read lock if the lock is used as a fall-back + * + * @param rwl + * A pointer to the rwlock structure. + */ +static inline void +rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl); + +/** + * Try to execute critical section in a hardware memory transaction, if it + * fails or not available take a write lock + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_write_lock_tm(rte_rwlock_t *rwl); + +/** + * Commit hardware memory transaction or release the write lock if the lock is used as a fall-back + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RWLOCK_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/generic/rte_spinlock.h b/src/dpdk22/lib/librte_eal/common/include/generic/rte_spinlock.h new file mode 100644 index 00000000..4e0a3c30 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/generic/rte_spinlock.h @@ -0,0 +1,325 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_SPINLOCK_H_ +#define _RTE_SPINLOCK_H_ + +/** + * @file + * + * RTE Spinlocks + * + * This file defines an API for read-write locks, which are implemented + * in an architecture-specific way. This kind of lock simply waits in + * a loop repeatedly checking until the lock becomes available. + * + * All locks must be initialised before use, and only initialised once. + * + */ + +#include +#ifdef RTE_FORCE_INTRINSICS +#include +#endif + +/** + * The rte_spinlock_t type. + */ +typedef struct { + volatile int locked; /**< lock status 0 = unlocked, 1 = locked */ +} rte_spinlock_t; + +/** + * A static spinlock initializer. + */ +#define RTE_SPINLOCK_INITIALIZER { 0 } + +/** + * Initialize the spinlock to an unlocked state. + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_init(rte_spinlock_t *sl) +{ + sl->locked = 0; +} + +/** + * Take the spinlock. + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_lock(rte_spinlock_t *sl); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_spinlock_lock(rte_spinlock_t *sl) +{ + while (__sync_lock_test_and_set(&sl->locked, 1)) + while(sl->locked) + rte_pause(); +} +#endif + +/** + * Release the spinlock. + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_unlock (rte_spinlock_t *sl); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_spinlock_unlock (rte_spinlock_t *sl) +{ + __sync_lock_release(&sl->locked); +} +#endif + +/** + * Try to take the lock. + * + * @param sl + * A pointer to the spinlock. + * @return + * 1 if the lock is successfully taken; 0 otherwise. + */ +static inline int +rte_spinlock_trylock (rte_spinlock_t *sl); + +#ifdef RTE_FORCE_INTRINSICS +static inline int +rte_spinlock_trylock (rte_spinlock_t *sl) +{ + return (__sync_lock_test_and_set(&sl->locked,1) == 0); +} +#endif + +/** + * Test if the lock is taken. + * + * @param sl + * A pointer to the spinlock. + * @return + * 1 if the lock is currently taken; 0 otherwise. + */ +static inline int rte_spinlock_is_locked (rte_spinlock_t *sl) +{ + return sl->locked; +} + +/** + * Test if hardware transactional memory (lock elision) is supported + * + * @return + * 1 if the hardware transactional memory is supported; 0 otherwise. + */ +static inline int rte_tm_supported(void); + +/** + * Try to execute critical section in a hardware memory transaction, + * if it fails or not available take the spinlock. + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_lock_tm(rte_spinlock_t *sl); + +/** + * Commit hardware memory transaction or release the spinlock if + * the spinlock is used as a fall-back + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_unlock_tm(rte_spinlock_t *sl); + +/** + * Try to execute critical section in a hardware memory transaction, + * if it fails or not available try to take the lock. + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param sl + * A pointer to the spinlock. + * @return + * 1 if the hardware memory transaction is successfully started + * or lock is successfully taken; 0 otherwise. + */ +static inline int +rte_spinlock_trylock_tm(rte_spinlock_t *sl); + +/** + * The rte_spinlock_recursive_t type. + */ +typedef struct { + rte_spinlock_t sl; /**< the actual spinlock */ + volatile int user; /**< core id using lock, -1 for unused */ + volatile int count; /**< count of time this lock has been called */ +} rte_spinlock_recursive_t; + +/** + * A static recursive spinlock initializer. + */ +#define RTE_SPINLOCK_RECURSIVE_INITIALIZER {RTE_SPINLOCK_INITIALIZER, -1, 0} + +/** + * Initialize the recursive spinlock to an unlocked state. + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_init(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_init(&slr->sl); + slr->user = -1; + slr->count = 0; +} + +/** + * Take the recursive spinlock. + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_lock(rte_spinlock_recursive_t *slr) +{ + int id = rte_gettid(); + + if (slr->user != id) { + rte_spinlock_lock(&slr->sl); + slr->user = id; + } + slr->count++; +} +/** + * Release the recursive spinlock. + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_unlock(rte_spinlock_recursive_t *slr) +{ + if (--(slr->count) == 0) { + slr->user = -1; + rte_spinlock_unlock(&slr->sl); + } + +} + +/** + * Try to take the recursive lock. + * + * @param slr + * A pointer to the recursive spinlock. + * @return + * 1 if the lock is successfully taken; 0 otherwise. + */ +static inline int rte_spinlock_recursive_trylock(rte_spinlock_recursive_t *slr) +{ + int id = rte_gettid(); + + if (slr->user != id) { + if (rte_spinlock_trylock(&slr->sl) == 0) + return 0; + slr->user = id; + } + slr->count++; + return 1; +} + + +/** + * Try to execute critical section in a hardware memory transaction, + * if it fails or not available take the recursive spinlocks + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_lock_tm( + rte_spinlock_recursive_t *slr); + +/** + * Commit hardware memory transaction or release the recursive spinlock + * if the recursive spinlock is used as a fall-back + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_unlock_tm( + rte_spinlock_recursive_t *slr); + +/** + * Try to execute critical section in a hardware memory transaction, + * if it fails or not available try to take the recursive lock + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param slr + * A pointer to the recursive spinlock. + * @return + * 1 if the hardware memory transaction is successfully started + * or lock is successfully taken; 0 otherwise. + */ +static inline int rte_spinlock_recursive_trylock_tm( + rte_spinlock_recursive_t *slr); + +#endif /* _RTE_SPINLOCK_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_alarm.h b/src/dpdk22/lib/librte_eal/common/include/rte_alarm.h new file mode 100644 index 00000000..4012cd67 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_alarm.h @@ -0,0 +1,106 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ALARM_H_ +#define _RTE_ALARM_H_ + +/** + * @file + * + * Alarm functions + * + * Simple alarm-clock functionality supplied by eal. + * Does not require hpet support. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/** + * Signature of callback back function called when an alarm goes off. + */ +typedef void (*rte_eal_alarm_callback)(void *arg); + +/** + * Function to set a callback to be triggered when us microseconds + * have expired. Accuracy of timing to the microsecond is not guaranteed. The + * alarm function will not be called *before* the requested time, but may + * be called a short period of time afterwards. + * The alarm handler will be called only once. There is no need to call + * "rte_eal_alarm_cancel" from within the callback function. + * + * @param us + * The time in microseconds before the callback is called + * @param cb + * The function to be called when the alarm expires + * @param cb_arg + * Pointer parameter to be passed to the callback function + * + * @return + * On success, zero. + * On failure, a negative error number + */ +int rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb, void *cb_arg); + +/** + * Function to cancel an alarm callback which has been registered before. If + * used outside alarm callback it wait for all callbacks to finish execution. + * + * @param cb_fn + * alarm callback + * @param cb_arg + * Pointer parameter to be passed to the callback function. To remove all + * copies of a given callback function, irrespective of parameter, (void *)-1 + * can be used here. + * + * @return + * - value greater than 0 and rte_errno not changed - returned value is + * the number of canceled alarm callback functions + * - value greater or equal 0 and rte_errno set to EINPROGRESS, at least one + * alarm could not be canceled because cancellation was requested from alarm + * callback context. Returned value is the number of succesfuly canceled + * alarm callbacks + * - 0 and rte_errno set to ENOENT - no alarm found + * - -1 and rte_errno set to EINVAL - invalid parameter (NULL callback) + */ +int rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg); + +#ifdef __cplusplus +} +#endif + + +#endif /* _RTE_ALARM_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_branch_prediction.h b/src/dpdk22/lib/librte_eal/common/include/rte_branch_prediction.h new file mode 100644 index 00000000..a6a56d17 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_branch_prediction.h @@ -0,0 +1,70 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Branch Prediction Helpers in RTE + */ + +#ifndef _RTE_BRANCH_PREDICTION_H_ +#define _RTE_BRANCH_PREDICTION_H_ + +/** + * Check if a branch is likely to be taken. + * + * This compiler builtin allows the developer to indicate if a branch is + * likely to be taken. Example: + * + * if (likely(x > 1)) + * do_stuff(); + * + */ +#ifndef likely +#define likely(x) __builtin_expect((x),1) +#endif /* likely */ + +/** + * Check if a branch is unlikely to be taken. + * + * This compiler builtin allows the developer to indicate if a branch is + * unlikely to be taken. Example: + * + * if (unlikely(x < 1)) + * do_stuff(); + * + */ +#ifndef unlikely +#define unlikely(x) __builtin_expect((x),0) +#endif /* unlikely */ + +#endif /* _RTE_BRANCH_PREDICTION_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_common.h b/src/dpdk22/lib/librte_eal/common/include/rte_common.h new file mode 100644 index 00000000..b58a3841 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_common.h @@ -0,0 +1,401 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_COMMON_H_ +#define _RTE_COMMON_H_ + +/** + * @file + * + * Generic, commonly-used macro and inline function definitions + * for Intel DPDK. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include + +#ifndef typeof +#define typeof __typeof__ +#endif + +#ifndef asm +#define asm __asm__ +#endif + +#ifdef RTE_ARCH_STRICT_ALIGN +typedef uint64_t unaligned_uint64_t __attribute__ ((aligned(1))); +typedef uint32_t unaligned_uint32_t __attribute__ ((aligned(1))); +typedef uint16_t unaligned_uint16_t __attribute__ ((aligned(1))); +#else +typedef uint64_t unaligned_uint64_t; +typedef uint32_t unaligned_uint32_t; +typedef uint16_t unaligned_uint16_t; +#endif + +/** + * Force alignment + */ +#define __rte_aligned(a) __attribute__((__aligned__(a))) + +/** + * Force a structure to be packed + */ +#define __rte_packed __attribute__((__packed__)) + +/******* Macro to mark functions and fields scheduled for removal *****/ +#define __rte_deprecated __attribute__((__deprecated__)) + +/*********** Macros to eliminate unused variable warnings ********/ + +/** + * short definition to mark a function parameter unused + */ +#define __rte_unused __attribute__((__unused__)) + +/** + * definition to mark a variable or function parameter as used so + * as to avoid a compiler warning + */ +#define RTE_SET_USED(x) (void)(x) + +/*********** Macros for pointer arithmetic ********/ + +/** + * add a byte-value offset from a pointer + */ +#define RTE_PTR_ADD(ptr, x) ((void*)((uintptr_t)(ptr) + (x))) + +/** + * subtract a byte-value offset from a pointer + */ +#define RTE_PTR_SUB(ptr, x) ((void*)((uintptr_t)ptr - (x))) + +/** + * get the difference between two pointer values, i.e. how far apart + * in bytes are the locations they point two. It is assumed that + * ptr1 is greater than ptr2. + */ +#define RTE_PTR_DIFF(ptr1, ptr2) ((uintptr_t)(ptr1) - (uintptr_t)(ptr2)) + +/*********** Macros/static functions for doing alignment ********/ + + +/** + * Macro to align a pointer to a given power-of-two. The resultant + * pointer will be a pointer of the same type as the first parameter, and + * point to an address no higher than the first parameter. Second parameter + * must be a power-of-two value. + */ +#define RTE_PTR_ALIGN_FLOOR(ptr, align) \ + ((typeof(ptr))RTE_ALIGN_FLOOR((uintptr_t)ptr, align)) + +/** + * Macro to align a value to a given power-of-two. The resultant value + * will be of the same type as the first parameter, and will be no + * bigger than the first parameter. Second parameter must be a + * power-of-two value. + */ +#define RTE_ALIGN_FLOOR(val, align) \ + (typeof(val))((val) & (~((typeof(val))((align) - 1)))) + +/** + * Macro to align a pointer to a given power-of-two. The resultant + * pointer will be a pointer of the same type as the first parameter, and + * point to an address no lower than the first parameter. Second parameter + * must be a power-of-two value. + */ +#define RTE_PTR_ALIGN_CEIL(ptr, align) \ + RTE_PTR_ALIGN_FLOOR((typeof(ptr))RTE_PTR_ADD(ptr, (align) - 1), align) + +/** + * Macro to align a value to a given power-of-two. The resultant value + * will be of the same type as the first parameter, and will be no lower + * than the first parameter. Second parameter must be a power-of-two + * value. + */ +#define RTE_ALIGN_CEIL(val, align) \ + RTE_ALIGN_FLOOR(((val) + ((typeof(val)) (align) - 1)), align) + +/** + * Macro to align a pointer to a given power-of-two. The resultant + * pointer will be a pointer of the same type as the first parameter, and + * point to an address no lower than the first parameter. Second parameter + * must be a power-of-two value. + * This function is the same as RTE_PTR_ALIGN_CEIL + */ +#define RTE_PTR_ALIGN(ptr, align) RTE_PTR_ALIGN_CEIL(ptr, align) + +/** + * Macro to align a value to a given power-of-two. The resultant + * value will be of the same type as the first parameter, and + * will be no lower than the first parameter. Second parameter + * must be a power-of-two value. + * This function is the same as RTE_ALIGN_CEIL + */ +#define RTE_ALIGN(val, align) RTE_ALIGN_CEIL(val, align) + +/** + * Checks if a pointer is aligned to a given power-of-two value + * + * @param ptr + * The pointer whose alignment is to be checked + * @param align + * The power-of-two value to which the ptr should be aligned + * + * @return + * True(1) where the pointer is correctly aligned, false(0) otherwise + */ +static inline int +rte_is_aligned(void *ptr, unsigned align) +{ + return RTE_PTR_ALIGN(ptr, align) == ptr; +} + +/*********** Macros for compile type checks ********/ + +/** + * Triggers an error at compilation time if the condition is true. + */ +#ifndef __OPTIMIZE__ +#define RTE_BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +#else +extern int RTE_BUILD_BUG_ON_detected_error; +#define RTE_BUILD_BUG_ON(condition) do { \ + ((void)sizeof(char[1 - 2*!!(condition)])); \ + if (condition) \ + RTE_BUILD_BUG_ON_detected_error = 1; \ +} while(0) +#endif + +/*********** Macros to work with powers of 2 ********/ + +/** + * Returns true if n is a power of 2 + * @param n + * Number to check + * @return 1 if true, 0 otherwise + */ +static inline int +rte_is_power_of_2(uint32_t n) +{ + return n && !(n & (n - 1)); +} + +/** + * Aligns input parameter to the next power of 2 + * + * @param x + * The integer value to algin + * + * @return + * Input parameter aligned to the next power of 2 + */ +static inline uint32_t +rte_align32pow2(uint32_t x) +{ + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + + return x + 1; +} + +/** + * Aligns 64b input parameter to the next power of 2 + * + * @param v + * The 64b value to align + * + * @return + * Input parameter aligned to the next power of 2 + */ +static inline uint64_t +rte_align64pow2(uint64_t v) +{ + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + + return v + 1; +} + +/*********** Macros for calculating min and max **********/ + +/** + * Macro to return the minimum of two numbers + */ +#define RTE_MIN(a, b) ({ \ + typeof (a) _a = (a); \ + typeof (b) _b = (b); \ + _a < _b ? _a : _b; \ + }) + +/** + * Macro to return the maximum of two numbers + */ +#define RTE_MAX(a, b) ({ \ + typeof (a) _a = (a); \ + typeof (b) _b = (b); \ + _a > _b ? _a : _b; \ + }) + +/*********** Other general functions / macros ********/ + +#ifdef __SSE2__ +#include +/** + * PAUSE instruction for tight loops (avoid busy waiting) + */ +static inline void +rte_pause (void) +{ + _mm_pause(); +} +#else +static inline void +rte_pause(void) {} +#endif + +/** + * Searches the input parameter for the least significant set bit + * (starting from zero). + * If a least significant 1 bit is found, its bit index is returned. + * If the content of the input parameter is zero, then the content of the return + * value is undefined. + * @param v + * input parameter, should not be zero. + * @return + * least significant set bit in the input parameter. + */ +static inline uint32_t +rte_bsf32(uint32_t v) +{ + return __builtin_ctz(v); +} + +#ifndef offsetof +/** Return the offset of a field in a structure. */ +#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER) +#endif + +#define _RTE_STR(x) #x +/** Take a macro value and get a string version of it */ +#define RTE_STR(x) _RTE_STR(x) + +/** Mask value of type "tp" for the first "ln" bit set. */ +#define RTE_LEN2MASK(ln, tp) \ + ((tp)((uint64_t)-1 >> (sizeof(uint64_t) * CHAR_BIT - (ln)))) + +/** Number of elements in the array. */ +#define RTE_DIM(a) (sizeof (a) / sizeof ((a)[0])) + +/** + * Converts a numeric string to the equivalent uint64_t value. + * As well as straight number conversion, also recognises the suffixes + * k, m and g for kilobytes, megabytes and gigabytes respectively. + * + * If a negative number is passed in i.e. a string with the first non-black + * character being "-", zero is returned. Zero is also returned in the case of + * an error with the strtoull call in the function. + * + * @param str + * String containing number to convert. + * @return + * Number. + */ +static inline uint64_t +rte_str_to_size(const char *str) +{ + char *endptr; + unsigned long long size; + + while (isspace((int)*str)) + str++; + if (*str == '-') + return 0; + + errno = 0; + size = strtoull(str, &endptr, 0); + if (errno) + return 0; + + if (*endptr == ' ') + endptr++; /* allow 1 space gap */ + + switch (*endptr){ + case 'G': case 'g': size *= 1024; /* fall-through */ + case 'M': case 'm': size *= 1024; /* fall-through */ + case 'K': case 'k': size *= 1024; /* fall-through */ + default: + break; + } + return size; +} + +/** + * Function to terminate the application immediately, printing an error + * message and returning the exit_code back to the shell. + * + * This function never returns + * + * @param exit_code + * The exit code to be returned by the application + * @param format + * The format string to be used for printing the message. This can include + * printf format characters which will be expanded using any further parameters + * to the function. + */ +void +rte_exit(int exit_code, const char *format, ...) + __attribute__((noreturn)) + __attribute__((format(printf, 2, 3))); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_debug.h b/src/dpdk22/lib/librte_eal/common/include/rte_debug.h new file mode 100644 index 00000000..94129fab --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_debug.h @@ -0,0 +1,103 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_DEBUG_H_ +#define _RTE_DEBUG_H_ + +/** + * @file + * + * Debug Functions in RTE + * + * This file defines a generic API for debug operations. Part of + * the implementation is architecture-specific. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Dump the stack of the calling core to the console. + */ +void rte_dump_stack(void); + +/** + * Dump the registers of the calling core to the console. + * + * Note: Not implemented in a userapp environment; use gdb instead. + */ +void rte_dump_registers(void); + +/** + * Provide notification of a critical non-recoverable error and terminate + * execution abnormally. + * + * Display the format string and its expanded arguments (printf-like). + * + * In a linuxapp environment, this function dumps the stack and calls + * abort() resulting in a core dump if enabled. + * + * The function never returns. + * + * @param ... + * The format string, followed by the variable list of arguments. + */ +#define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy") +#define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__) + +#define RTE_VERIFY(exp) do { \ + if (!(exp)) \ + rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \ +} while (0) + +/* + * Provide notification of a critical non-recoverable error and stop. + * + * This function should not be called directly. Refer to rte_panic() macro + * documentation. + */ +void __rte_panic(const char *funcname , const char *format, ...) +#ifdef __GNUC__ +#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2)) + __attribute__((cold)) +#endif +#endif + __attribute__((noreturn)) + __attribute__((format(printf, 2, 3))); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_DEBUG_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_dev.h b/src/dpdk22/lib/librte_eal/common/include/rte_dev.h new file mode 100644 index 00000000..f1b55079 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_dev.h @@ -0,0 +1,192 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_DEV_H_ +#define _RTE_DEV_H_ + +/** + * @file + * + * RTE PMD Driver Registration Interface + * + * This file manages the list of device drivers. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include + +__attribute__((format(printf, 2, 0))) +static inline void +rte_pmd_debug_trace(const char *func_name, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + + char buffer[vsnprintf(NULL, 0, fmt, ap) + 1]; + + va_end(ap); + + va_start(ap, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, ap); + va_end(ap); + + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", func_name, buffer); +} + +/* Macros for checking for restricting functions to primary instance only */ +#define RTE_PROC_PRIMARY_OR_ERR_RET(retval) do { \ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { \ + RTE_PMD_DEBUG_TRACE("Cannot run in secondary processes\n"); \ + return retval; \ + } \ +} while (0) + +#define RTE_PROC_PRIMARY_OR_RET() do { \ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { \ + RTE_PMD_DEBUG_TRACE("Cannot run in secondary processes\n"); \ + return; \ + } \ +} while (0) + +/* Macros to check for invalid function pointers */ +#define RTE_FUNC_PTR_OR_ERR_RET(func, retval) do { \ + if ((func) == NULL) { \ + RTE_PMD_DEBUG_TRACE("Function not supported\n"); \ + return retval; \ + } \ +} while (0) + +#define RTE_FUNC_PTR_OR_RET(func) do { \ + if ((func) == NULL) { \ + RTE_PMD_DEBUG_TRACE("Function not supported\n"); \ + return; \ + } \ +} while (0) + + +/** Double linked list of device drivers. */ +TAILQ_HEAD(rte_driver_list, rte_driver); + +/** + * Initialization function called for each device driver once. + */ +typedef int (rte_dev_init_t)(const char *name, const char *args); + +/** + * Uninitilization function called for each device driver once. + */ +typedef int (rte_dev_uninit_t)(const char *name); + +/** + * Driver type enumeration + */ +enum pmd_type { + PMD_VDEV = 0, + PMD_PDEV = 1, +}; + +/** + * A structure describing a device driver. + */ +struct rte_driver { + TAILQ_ENTRY(rte_driver) next; /**< Next in list. */ + enum pmd_type type; /**< PMD Driver type */ + const char *name; /**< Driver name. */ + rte_dev_init_t *init; /**< Device init. function. */ + rte_dev_uninit_t *uninit; /**< Device uninit. function. */ +}; + +/** + * Register a device driver. + * + * @param driver + * A pointer to a rte_dev structure describing the driver + * to be registered. + */ +void rte_eal_driver_register(struct rte_driver *driver); + +/** + * Unregister a device driver. + * + * @param driver + * A pointer to a rte_dev structure describing the driver + * to be unregistered. + */ +void rte_eal_driver_unregister(struct rte_driver *driver); + +/** + * Initalize all the registered drivers in this process + */ +int rte_eal_dev_init(void); + +/** + * Initialize a driver specified by name. + * + * @param name + * The pointer to a driver name to be initialized. + * @param args + * The pointer to arguments used by driver initialization. + * @return + * 0 on success, negative on error + */ +int rte_eal_vdev_init(const char *name, const char *args); + +/** + * Uninitalize a driver specified by name. + * + * @param name + * The pointer to a driver name to be initialized. + * @return + * 0 on success, negative on error + */ +int rte_eal_vdev_uninit(const char *name); + +#define PMD_REGISTER_DRIVER(d)\ +void devinitfn_ ##d(void);\ +void __attribute__((constructor, used)) devinitfn_ ##d(void)\ +{\ + rte_eal_driver_register(&d);\ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_VDEV_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_devargs.h b/src/dpdk22/lib/librte_eal/common/include/rte_devargs.h new file mode 100644 index 00000000..53c59f56 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_devargs.h @@ -0,0 +1,177 @@ +/*- + * BSD LICENSE + * + * Copyright 2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_DEVARGS_H_ +#define _RTE_DEVARGS_H_ + +/** + * @file + * + * RTE devargs: list of devices and their user arguments + * + * This file stores a list of devices and their arguments given by + * the user when a DPDK application is started. These devices can be PCI + * devices or virtual devices. These devices are stored at startup in a + * list of rte_devargs structures. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/** + * Type of generic device + */ +enum rte_devtype { + RTE_DEVTYPE_WHITELISTED_PCI, + RTE_DEVTYPE_BLACKLISTED_PCI, + RTE_DEVTYPE_VIRTUAL, +}; + +/** + * Structure that stores a device given by the user with its arguments + * + * A user device is a physical or a virtual device given by the user to + * the DPDK application at startup through command line arguments. + * + * The structure stores the configuration of the device, its PCI + * identifier if it's a PCI device or the driver name if it's a virtual + * device. + */ +struct rte_devargs { + /** Next in list. */ + TAILQ_ENTRY(rte_devargs) next; + /** Type of device. */ + enum rte_devtype type; + union { + /** Used if type is RTE_DEVTYPE_*_PCI. */ + struct { + /** PCI location. */ + struct rte_pci_addr addr; + } pci; + /** Used if type is RTE_DEVTYPE_VIRTUAL. */ + struct { + /** Driver name. */ + char drv_name[32]; + } virt; + }; + /** Arguments string as given by user or "" for no argument. */ + char *args; +}; + +/** user device double-linked queue type definition */ +TAILQ_HEAD(rte_devargs_list, rte_devargs); + +/** Global list of user devices */ +extern struct rte_devargs_list devargs_list; + +/** + * Parse a devargs string. + * + * For PCI devices, the format of arguments string is "PCI_ADDR" or + * "PCI_ADDR,key=val,key2=val2,...". Examples: "08:00.1", "0000:5:00.0", + * "04:00.0,arg=val". + * + * For virtual devices, the format of arguments string is "DRIVER_NAME*" + * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring", + * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". + * + * The function parses the arguments string to get driver name and driver + * arguments. + * + * @param devargs_str + * The arguments as given by the user. + * @param drvname + * The pointer to the string to store parsed driver name. + * @param drvargs + * The pointer to the string to store parsed driver arguments. + * + * @return + * - 0 on success + * - A negative value on error + */ +int rte_eal_parse_devargs_str(const char *devargs_str, + char **drvname, char **drvargs); + +/** + * Add a device to the user device list + * + * For PCI devices, the format of arguments string is "PCI_ADDR" or + * "PCI_ADDR,key=val,key2=val2,...". Examples: "08:00.1", "0000:5:00.0", + * "04:00.0,arg=val". + * + * For virtual devices, the format of arguments string is "DRIVER_NAME*" + * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring", + * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". The validity of the + * driver name is not checked by this function, it is done when probing + * the drivers. + * + * @param devtype + * The type of the device. + * @param devargs_str + * The arguments as given by the user. + * + * @return + * - 0 on success + * - A negative value on error + */ +int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str); + +/** + * Count the number of user devices of a specified type + * + * @param devtype + * The type of the devices to counted. + * + * @return + * The number of devices. + */ +unsigned int +rte_eal_devargs_type_count(enum rte_devtype devtype); + +/** + * This function dumps the list of user device and their arguments. + * + * @param f + * A pointer to a file for output + */ +void rte_eal_devargs_dump(FILE *f); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_DEVARGS_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_eal.h b/src/dpdk22/lib/librte_eal/common/include/rte_eal.h new file mode 100644 index 00000000..d2816a84 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_eal.h @@ -0,0 +1,241 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_EAL_H_ +#define _RTE_EAL_H_ + +/** + * @file + * + * EAL Configuration API + */ + +#include +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_MAGIC 19820526 /**< Magic number written by the main partition when ready. */ + +/* Maximum thread_name length. */ +#define RTE_MAX_THREAD_NAME_LEN 16 + +/** + * The lcore role (used in RTE or not). + */ +enum rte_lcore_role_t { + ROLE_RTE, + ROLE_OFF, +}; + +/** + * The type of process in a linuxapp, multi-process setup + */ +enum rte_proc_type_t { + RTE_PROC_AUTO = -1, /* allow auto-detection of primary/secondary */ + RTE_PROC_PRIMARY = 0, /* set to zero, so primary is the default */ + RTE_PROC_SECONDARY, + + RTE_PROC_INVALID +}; + +/** + * The global RTE configuration structure. + */ +struct rte_config { + uint32_t master_lcore; /**< Id of the master lcore */ + uint32_t lcore_count; /**< Number of available logical cores. */ + enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */ + + /** Primary or secondary configuration */ + enum rte_proc_type_t process_type; + + /** + * Pointer to memory configuration, which may be shared across multiple + * Intel DPDK instances + */ + struct rte_mem_config *mem_config; +} __attribute__((__packed__)); + +/** + * Get the global configuration structure. + * + * @return + * A pointer to the global configuration structure. + */ +struct rte_config *rte_eal_get_configuration(void); + +/** + * Get a lcore's role. + * + * @param lcore_id + * The identifier of the lcore. + * @return + * The role of the lcore. + */ +enum rte_lcore_role_t rte_eal_lcore_role(unsigned lcore_id); + + +/** + * Get the process type in a multi-process setup + * + * @return + * The process type + */ +enum rte_proc_type_t rte_eal_process_type(void); + +/** + * Request iopl privilege for all RPL. + * + * This function should be called by pmds which need access to ioports. + + * @return + * - On success, returns 0. + * - On failure, returns -1. + */ +int rte_eal_iopl_init(void); + +/** + * Initialize the Environment Abstraction Layer (EAL). + * + * This function is to be executed on the MASTER lcore only, as soon + * as possible in the application's main() function. + * + * The function finishes the initialization process before main() is called. + * It puts the SLAVE lcores in the WAIT state. + * + * When the multi-partition feature is supported, depending on the + * configuration (if CONFIG_RTE_EAL_MAIN_PARTITION is disabled), this + * function waits to ensure that the magic number is set before + * returning. See also the rte_eal_get_configuration() function. Note: + * This behavior may change in the future. + * + * @param argc + * The argc argument that was given to the main() function. + * @param argv + * The argv argument that was given to the main() function. + * @return + * - On success, the number of parsed arguments, which is greater or + * equal to zero. After the call to rte_eal_init(), + * all arguments argv[x] with x < ret may be modified and should + * not be accessed by the application. + * - On failure, a negative error value. + */ +int rte_eal_init(int argc, char **argv); +/** + * Usage function typedef used by the application usage function. + * + * Use this function typedef to define and call rte_set_applcation_usage_hook() + * routine. + */ +typedef void (*rte_usage_hook_t)(const char * prgname); + +/** + * Add application usage routine callout from the eal_usage() routine. + * + * This function allows the application to include its usage message + * in the EAL system usage message. The routine rte_set_application_usage_hook() + * needs to be called before the rte_eal_init() routine in the application. + * + * This routine is optional for the application and will behave as if the set + * routine was never called as the default behavior. + * + * @param usage_func + * The func argument is a function pointer to the application usage routine. + * Called function is defined using rte_usage_hook_t typedef, which is of + * the form void rte_usage_func(const char * prgname). + * + * Calling this routine with a NULL value will reset the usage hook routine and + * return the current value, which could be NULL. + * @return + * - Returns the current value of the rte_application_usage pointer to allow + * the caller to daisy chain the usage routines if needing more then one. + */ +rte_usage_hook_t +rte_set_application_usage_hook(rte_usage_hook_t usage_func); + +/** + * macro to get the lock of tailq in mem_config + */ +#define RTE_EAL_TAILQ_RWLOCK (&rte_eal_get_configuration()->mem_config->qlock) + +/** + * macro to get the multiple lock of mempool shared by mutiple-instance + */ +#define RTE_EAL_MEMPOOL_RWLOCK (&rte_eal_get_configuration()->mem_config->mplock) + +/** + * Whether EAL is using huge pages (disabled by --no-huge option). + * The no-huge mode cannot be used with UIO poll-mode drivers like igb/ixgbe. + * It is useful for NIC drivers (e.g. librte_pmd_mlx4, librte_pmd_vmxnet3) or + * crypto drivers (e.g. librte_crypto_nitrox) provided by third-parties such + * as 6WIND. + * + * @return + * Nonzero if hugepages are enabled. + */ +int rte_eal_has_hugepages(void); + +/** + * A wrap API for syscall gettid. + * + * @return + * On success, returns the thread ID of calling process. + * It is always successful. + */ +int rte_sys_gettid(void); + +/** + * Get system unique thread id. + * + * @return + * On success, returns the thread ID of calling process. + * It is always successful. + */ +static inline int rte_gettid(void) +{ + static RTE_DEFINE_PER_LCORE(int, _thread_id) = -1; + if (RTE_PER_LCORE(_thread_id) == -1) + RTE_PER_LCORE(_thread_id) = rte_sys_gettid(); + return RTE_PER_LCORE(_thread_id); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_EAL_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_eal_memconfig.h b/src/dpdk22/lib/librte_eal/common/include/rte_eal_memconfig.h new file mode 100644 index 00000000..2b5e0b17 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_eal_memconfig.h @@ -0,0 +1,100 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_EAL_MEMCONFIG_H_ +#define _RTE_EAL_MEMCONFIG_H_ + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * the structure for the memory configuration for the RTE. + * Used by the rte_config structure. It is separated out, as for multi-process + * support, the memory details should be shared across instances + */ +struct rte_mem_config { + volatile uint32_t magic; /**< Magic number - Sanity check. */ + + /* memory topology */ + uint32_t nchannel; /**< Number of channels (0 if unknown). */ + uint32_t nrank; /**< Number of ranks (0 if unknown). */ + + /** + * current lock nest order + * - qlock->mlock (ring/hash/lpm) + * - mplock->qlock->mlock (mempool) + * Notice: + * *ALWAYS* obtain qlock first if having to obtain both qlock and mlock + */ + rte_rwlock_t mlock; /**< only used by memzone LIB for thread-safe. */ + rte_rwlock_t qlock; /**< used for tailq operation for thread safe. */ + rte_rwlock_t mplock; /**< only used by mempool LIB for thread-safe. */ + + uint32_t memzone_cnt; /**< Number of allocated memzones */ + + /* memory segments and zones */ + struct rte_memseg memseg[RTE_MAX_MEMSEG]; /**< Physmem descriptors. */ + struct rte_memzone memzone[RTE_MAX_MEMZONE]; /**< Memzone descriptors. */ + + struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */ + + /* Heaps of Malloc per socket */ + struct malloc_heap malloc_heaps[RTE_MAX_NUMA_NODES]; + + /* address of mem_config in primary process. used to map shared config into + * exact same address the primary process maps it. + */ + uint64_t mem_cfg_addr; +} __attribute__((__packed__)); + + +inline static void +rte_eal_mcfg_wait_complete(struct rte_mem_config* mcfg) +{ + /* wait until shared mem_config finish initialising */ + while(mcfg->magic != RTE_MAGIC) + rte_pause(); +} + +#ifdef __cplusplus +} +#endif + +#endif /*__RTE_EAL_MEMCONFIG_H_*/ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_errno.h b/src/dpdk22/lib/librte_eal/common/include/rte_errno.h new file mode 100644 index 00000000..2e5cc454 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_errno.h @@ -0,0 +1,95 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * + * API for error cause tracking + */ + +#ifndef _RTE_ERRNO_H_ +#define _RTE_ERRNO_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +RTE_DECLARE_PER_LCORE(int, _rte_errno); /**< Per core error number. */ + +/** + * Error number value, stored per-thread, which can be queried after + * calls to certain functions to determine why those functions failed. + * + * Uses standard values from errno.h wherever possible, with a small number + * of additional possible values for RTE-specific conditions. + */ +#define rte_errno RTE_PER_LCORE(_rte_errno) + +/** + * Function which returns a printable string describing a particular + * error code. For non-RTE-specific error codes, this function returns + * the value from the libc strerror function. + * + * @param errnum + * The error number to be looked up - generally the value of rte_errno + * @return + * A pointer to a thread-local string containing the text describing + * the error. + */ +const char *rte_strerror(int errnum); + +#ifndef __ELASTERROR +/** + * Check if we have a defined value for the max system-defined errno values. + * if no max defined, start from 1000 to prevent overlap with standard values + */ +#define __ELASTERROR 1000 +#endif + +/** Error types */ +enum { + RTE_MIN_ERRNO = __ELASTERROR, /**< Start numbering above std errno vals */ + + E_RTE_SECONDARY, /**< Operation not allowed in secondary processes */ + E_RTE_NO_CONFIG, /**< Missing rte_config */ + + RTE_MAX_ERRNO /**< Max RTE error number */ +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ERRNO_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_hexdump.h b/src/dpdk22/lib/librte_eal/common/include/rte_hexdump.h new file mode 100644 index 00000000..5c18a50b --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_hexdump.h @@ -0,0 +1,89 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_HEXDUMP_H_ +#define _RTE_HEXDUMP_H_ + +/** + * @file + * Simple API to dump out memory in a special hex format. + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** +* Dump out memory in a special hex dump format. +* +* @param f +* A pointer to a file for output +* @param title +* If not NULL this string is printed as a header to the output. +* @param buf +* This is the buffer address to print out. +* @param len +* The number of bytes to dump out +* @return +* None. +*/ + +extern void +rte_hexdump(FILE *f, const char * title, const void * buf, unsigned int len); + +/** +* Dump out memory in a hex format with colons between bytes. +* +* @param f +* A pointer to a file for output +* @param title +* If not NULL this string is printed as a header to the output. +* @param buf +* This is the buffer address to print out. +* @param len +* The number of bytes to dump out +* @return +* None. +*/ + +void +rte_memdump(FILE *f, const char * title, const void * buf, unsigned int len); + + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_HEXDUMP_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_interrupts.h b/src/dpdk22/lib/librte_eal/common/include/rte_interrupts.h new file mode 100644 index 00000000..ff11ef3a --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_interrupts.h @@ -0,0 +1,120 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_INTERRUPTS_H_ +#define _RTE_INTERRUPTS_H_ + +/** + * @file + * + * The RTE interrupt interface provides functions to register/unregister + * callbacks for a specific interrupt. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** Interrupt handle */ +struct rte_intr_handle; + +/** Function to be registered for the specific interrupt */ +typedef void (*rte_intr_callback_fn)(struct rte_intr_handle *intr_handle, + void *cb_arg); + +#include + +/** + * It registers the callback for the specific interrupt. Multiple + * callbacks cal be registered at the same time. + * @param intr_handle + * Pointer to the interrupt handle. + * @param cb + * callback address. + * @param cb_arg + * address of parameter for callback. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_intr_callback_register(struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, void *cb_arg); + +/** + * It unregisters the callback according to the specified interrupt handle. + * + * @param intr_handle + * pointer to the interrupt handle. + * @param cb + * callback address. + * @param cb_arg + * address of parameter for callback, (void *)-1 means to remove all + * registered which has the same callback address. + * + * @return + * - On success, return the number of callback entities removed. + * - On failure, a negative value. + */ +int rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, void *cb_arg); + +/** + * It enables the interrupt for the specified handle. + * + * @param intr_handle + * pointer to the interrupt handle. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_intr_enable(struct rte_intr_handle *intr_handle); + +/** + * It disables the interrupt for the specified handle. + * + * @param intr_handle + * pointer to the interrupt handle. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_intr_disable(struct rte_intr_handle *intr_handle); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_keepalive.h b/src/dpdk22/lib/librte_eal/common/include/rte_keepalive.h new file mode 100644 index 00000000..02472c02 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_keepalive.h @@ -0,0 +1,146 @@ +/*- + * BSD LICENSE + * + * Copyright 2015 Intel Shannon Ltd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file rte_keepalive.h + * DPDK RTE LCore Keepalive Monitor. + * + **/ + +#ifndef _KEEPALIVE_H_ +#define _KEEPALIVE_H_ + +#include + +#ifndef RTE_KEEPALIVE_MAXCORES +/** + * Number of cores to track. + * @note Must be larger than the highest core id. */ +#define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE +#endif + + +/** + * Keepalive failure callback. + * + * Receives a data pointer passed to rte_keepalive_create() and the id of the + * failed core. + */ +typedef void (*rte_keepalive_failure_callback_t)( + void *data, + const int id_core); + + +/** + * Keepalive state structure. + * @internal + */ +struct rte_keepalive { + /** Core Liveness. */ + enum { + ALIVE = 1, + MISSING = 0, + DEAD = 2, + GONE = 3 + } __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES]; + + /** Last-seen-alive timestamps */ + uint64_t last_alive[RTE_KEEPALIVE_MAXCORES]; + + /** + * Cores to check. + * Indexed by core id, non-zero if the core should be checked. + */ + uint8_t active_cores[RTE_KEEPALIVE_MAXCORES]; + + /** Dead core handler. */ + rte_keepalive_failure_callback_t callback; + + /** + * Dead core handler app data. + * Pointer is passed to dead core handler. + */ + void *callback_data; + uint64_t tsc_initial; + uint64_t tsc_mhz; +}; + + +/** + * Initialise keepalive sub-system. + * @param callback + * Function called upon detection of a dead core. + * @param data + * Data pointer to be passed to function callback. + * @return + * Keepalive structure success, NULL on failure. + */ +struct rte_keepalive *rte_keepalive_create( + rte_keepalive_failure_callback_t callback, + void *data); + + +/** + * Checks & handles keepalive state of monitored cores. + * @param *ptr_timer Triggering timer (unused) + * @param *ptr_data Data pointer (keepalive structure) + */ +void rte_keepalive_dispatch_pings(void *ptr_timer, void *ptr_data); + + +/** + * Registers a core for keepalive checks. + * @param *keepcfg + * Keepalive structure pointer + * @param id_core + * ID number of core to register. + */ +void rte_keepalive_register_core(struct rte_keepalive *keepcfg, + const int id_core); + + +/** + * Per-core keepalive check. + * @param *keepcfg + * Keepalive structure pointer + * + * This function needs to be called from within the main process loop of + * the LCore to be checked. + */ +static inline void +rte_keepalive_mark_alive(struct rte_keepalive *keepcfg) +{ + keepcfg->state_flags[rte_lcore_id()] = ALIVE; +} + + +#endif /* _KEEPALIVE_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_launch.h b/src/dpdk22/lib/librte_eal/common/include/rte_launch.h new file mode 100644 index 00000000..dd1946da --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_launch.h @@ -0,0 +1,177 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_LAUNCH_H_ +#define _RTE_LAUNCH_H_ + +/** + * @file + * + * Launch tasks on other lcores + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * State of an lcore. + */ +enum rte_lcore_state_t { + WAIT, /**< waiting a new command */ + RUNNING, /**< executing command */ + FINISHED, /**< command executed */ +}; + +/** + * Definition of a remote launch function. + */ +typedef int (lcore_function_t)(void *); + +/** + * Launch a function on another lcore. + * + * To be executed on the MASTER lcore only. + * + * Sends a message to a slave lcore (identified by the slave_id) that + * is in the WAIT state (this is true after the first call to + * rte_eal_init()). This can be checked by first calling + * rte_eal_wait_lcore(slave_id). + * + * When the remote lcore receives the message, it switches to + * the RUNNING state, then calls the function f with argument arg. Once the + * execution is done, the remote lcore switches to a FINISHED state and + * the return value of f is stored in a local variable to be read using + * rte_eal_wait_lcore(). + * + * The MASTER lcore returns as soon as the message is sent and knows + * nothing about the completion of f. + * + * Note: This function is not designed to offer optimum + * performance. It is just a practical way to launch a function on + * another lcore at initialization time. + * + * @param f + * The function to be called. + * @param arg + * The argument for the function. + * @param slave_id + * The identifier of the lcore on which the function should be executed. + * @return + * - 0: Success. Execution of function f started on the remote lcore. + * - (-EBUSY): The remote lcore is not in a WAIT state. + */ +int rte_eal_remote_launch(lcore_function_t *f, void *arg, unsigned slave_id); + +/** + * This enum indicates whether the master core must execute the handler + * launched on all logical cores. + */ +enum rte_rmt_call_master_t { + SKIP_MASTER = 0, /**< lcore handler not executed by master core. */ + CALL_MASTER, /**< lcore handler executed by master core. */ +}; + +/** + * Launch a function on all lcores. + * + * Check that each SLAVE lcore is in a WAIT state, then call + * rte_eal_remote_launch() for each lcore. + * + * @param f + * The function to be called. + * @param arg + * The argument for the function. + * @param call_master + * If call_master set to SKIP_MASTER, the MASTER lcore does not call + * the function. If call_master is set to CALL_MASTER, the function + * is also called on master before returning. In any case, the master + * lcore returns as soon as it finished its job and knows nothing + * about the completion of f on the other lcores. + * @return + * - 0: Success. Execution of function f started on all remote lcores. + * - (-EBUSY): At least one remote lcore is not in a WAIT state. In this + * case, no message is sent to any of the lcores. + */ +int rte_eal_mp_remote_launch(lcore_function_t *f, void *arg, + enum rte_rmt_call_master_t call_master); + +/** + * Get the state of the lcore identified by slave_id. + * + * To be executed on the MASTER lcore only. + * + * @param slave_id + * The identifier of the lcore. + * @return + * The state of the lcore. + */ +enum rte_lcore_state_t rte_eal_get_lcore_state(unsigned slave_id); + +/** + * Wait until an lcore finishes its job. + * + * To be executed on the MASTER lcore only. + * + * If the slave lcore identified by the slave_id is in a FINISHED state, + * switch to the WAIT state. If the lcore is in RUNNING state, wait until + * the lcore finishes its job and moves to the FINISHED state. + * + * @param slave_id + * The identifier of the lcore. + * @return + * - 0: If the lcore identified by the slave_id is in a WAIT state. + * - The value that was returned by the previous remote launch + * function call if the lcore identified by the slave_id was in a + * FINISHED or RUNNING state. In this case, it changes the state + * of the lcore to WAIT. + */ +int rte_eal_wait_lcore(unsigned slave_id); + +/** + * Wait until all lcores finish their jobs. + * + * To be executed on the MASTER lcore only. Issue an + * rte_eal_wait_lcore() for every lcore. The return values are + * ignored. + * + * After a call to rte_eal_mp_wait_lcore(), the caller can assume + * that all slave lcores are in a WAIT state. + */ +void rte_eal_mp_wait_lcore(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LAUNCH_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_lcore.h b/src/dpdk22/lib/librte_eal/common/include/rte_lcore.h new file mode 100644 index 00000000..25460b92 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_lcore.h @@ -0,0 +1,276 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_LCORE_H_ +#define _RTE_LCORE_H_ + +/** + * @file + * + * API for lcore and socket manipulation + * + */ +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define LCORE_ID_ANY UINT32_MAX /**< Any lcore. */ + +#if defined(__linux__) + typedef cpu_set_t rte_cpuset_t; +#elif defined(__FreeBSD__) +#include + typedef cpuset_t rte_cpuset_t; +#endif + +/** + * Structure storing internal configuration (per-lcore) + */ +struct lcore_config { + unsigned detected; /**< true if lcore was detected */ + pthread_t thread_id; /**< pthread identifier */ + int pipe_master2slave[2]; /**< communication pipe with master */ + int pipe_slave2master[2]; /**< communication pipe with master */ + lcore_function_t * volatile f; /**< function to call */ + void * volatile arg; /**< argument of function */ + volatile int ret; /**< return value of function */ + volatile enum rte_lcore_state_t state; /**< lcore state */ + unsigned socket_id; /**< physical socket id for this lcore */ + unsigned core_id; /**< core number on socket for this lcore */ + int core_index; /**< relative index, starting from 0 */ + rte_cpuset_t cpuset; /**< cpu set which the lcore affinity to */ +}; + +/** + * Internal configuration (per-lcore) + */ +extern struct lcore_config lcore_config[RTE_MAX_LCORE]; + +RTE_DECLARE_PER_LCORE(unsigned, _lcore_id); /**< Per thread "lcore id". */ +RTE_DECLARE_PER_LCORE(rte_cpuset_t, _cpuset); /**< Per thread "cpuset". */ + +/** + * Return the ID of the execution unit we are running on. + * @return + * Logical core ID (in EAL thread) or LCORE_ID_ANY (in non-EAL thread) + */ +static inline unsigned +rte_lcore_id(void) +{ + return RTE_PER_LCORE(_lcore_id); +} + +/** + * Get the id of the master lcore + * + * @return + * the id of the master lcore + */ +static inline unsigned +rte_get_master_lcore(void) +{ + return rte_eal_get_configuration()->master_lcore; +} + +/** + * Return the number of execution units (lcores) on the system. + * + * @return + * the number of execution units (lcores) on the system. + */ +static inline unsigned +rte_lcore_count(void) +{ + const struct rte_config *cfg = rte_eal_get_configuration(); + return cfg->lcore_count; +} + +/** + * Return the index of the lcore starting from zero. + * The order is physical or given by command line (-l option). + * + * @param lcore_id + * The targeted lcore, or -1 for the current one. + * @return + * The relative index, or -1 if not enabled. + */ +static inline int +rte_lcore_index(int lcore_id) +{ + if (lcore_id >= RTE_MAX_LCORE) + return -1; + if (lcore_id < 0) + lcore_id = rte_lcore_id(); + return lcore_config[lcore_id].core_index; +} + +/** + * Return the ID of the physical socket of the logical core we are + * running on. + * @return + * the ID of current lcoreid's physical socket + */ +unsigned rte_socket_id(void); + +/** + * Get the ID of the physical socket of the specified lcore + * + * @param lcore_id + * the targeted lcore, which MUST be between 0 and RTE_MAX_LCORE-1. + * @return + * the ID of lcoreid's physical socket + */ +static inline unsigned +rte_lcore_to_socket_id(unsigned lcore_id) +{ + return lcore_config[lcore_id].socket_id; +} + +/** + * Test if an lcore is enabled. + * + * @param lcore_id + * The identifier of the lcore, which MUST be between 0 and + * RTE_MAX_LCORE-1. + * @return + * True if the given lcore is enabled; false otherwise. + */ +static inline int +rte_lcore_is_enabled(unsigned lcore_id) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + if (lcore_id >= RTE_MAX_LCORE) + return 0; + return (cfg->lcore_role[lcore_id] != ROLE_OFF); +} + +/** + * Get the next enabled lcore ID. + * + * @param i + * The current lcore (reference). + * @param skip_master + * If true, do not return the ID of the master lcore. + * @param wrap + * If true, go back to 0 when RTE_MAX_LCORE is reached; otherwise, + * return RTE_MAX_LCORE. + * @return + * The next lcore_id or RTE_MAX_LCORE if not found. + */ +static inline unsigned +rte_get_next_lcore(unsigned i, int skip_master, int wrap) +{ + i++; + if (wrap) + i %= RTE_MAX_LCORE; + + while (i < RTE_MAX_LCORE) { + if (!rte_lcore_is_enabled(i) || + (skip_master && (i == rte_get_master_lcore()))) { + i++; + if (wrap) + i %= RTE_MAX_LCORE; + continue; + } + break; + } + return i; +} +/** + * Macro to browse all running lcores. + */ +#define RTE_LCORE_FOREACH(i) \ + for (i = rte_get_next_lcore(-1, 0, 0); \ + i= 2.12 supports this feature. + * + * This macro only used for Linux, BSD does direct libc call. + * BSD libc version of function is `pthread_set_name_np()`. + */ +#if defined(__DOXYGEN__) +#define rte_thread_setname(...) pthread_setname_np(__VA_ARGS__) +#endif + +#if defined(__GLIBC__) && defined(__GLIBC_PREREQ) +#if __GLIBC_PREREQ(2, 12) +#define rte_thread_setname(...) pthread_setname_np(__VA_ARGS__) +#else +#define rte_thread_setname(...) 0 +#endif +#endif + +#ifdef __cplusplus +} +#endif + + +#endif /* _RTE_LCORE_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_log.h b/src/dpdk22/lib/librte_eal/common/include/rte_log.h new file mode 100644 index 00000000..2e47e7f6 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_log.h @@ -0,0 +1,311 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_LOG_H_ +#define _RTE_LOG_H_ + +/** + * @file + * + * RTE Logs API + * + * This file provides a log API to RTE applications. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/** The rte_log structure. */ +struct rte_logs { + uint32_t type; /**< Bitfield with enabled logs. */ + uint32_t level; /**< Log level. */ + FILE *file; /**< Pointer to current FILE* for logs. */ +}; + +/** Global log informations */ +extern struct rte_logs rte_logs; + +/* SDK log type */ +#define RTE_LOGTYPE_EAL 0x00000001 /**< Log related to eal. */ +#define RTE_LOGTYPE_MALLOC 0x00000002 /**< Log related to malloc. */ +#define RTE_LOGTYPE_RING 0x00000004 /**< Log related to ring. */ +#define RTE_LOGTYPE_MEMPOOL 0x00000008 /**< Log related to mempool. */ +#define RTE_LOGTYPE_TIMER 0x00000010 /**< Log related to timers. */ +#define RTE_LOGTYPE_PMD 0x00000020 /**< Log related to poll mode driver. */ +#define RTE_LOGTYPE_HASH 0x00000040 /**< Log related to hash table. */ +#define RTE_LOGTYPE_LPM 0x00000080 /**< Log related to LPM. */ +#define RTE_LOGTYPE_KNI 0x00000100 /**< Log related to KNI. */ +#define RTE_LOGTYPE_ACL 0x00000200 /**< Log related to ACL. */ +#define RTE_LOGTYPE_POWER 0x00000400 /**< Log related to power. */ +#define RTE_LOGTYPE_METER 0x00000800 /**< Log related to QoS meter. */ +#define RTE_LOGTYPE_SCHED 0x00001000 /**< Log related to QoS port scheduler. */ +#define RTE_LOGTYPE_PORT 0x00002000 /**< Log related to port. */ +#define RTE_LOGTYPE_TABLE 0x00004000 /**< Log related to table. */ +#define RTE_LOGTYPE_PIPELINE 0x00008000 /**< Log related to pipeline. */ +#define RTE_LOGTYPE_MBUF 0x00010000 /**< Log related to mbuf. */ +#define RTE_LOGTYPE_CRYPTODEV 0x00020000 /**< Log related to cryptodev. */ + +/* these log types can be used in an application */ +#define RTE_LOGTYPE_USER1 0x01000000 /**< User-defined log type 1. */ +#define RTE_LOGTYPE_USER2 0x02000000 /**< User-defined log type 2. */ +#define RTE_LOGTYPE_USER3 0x04000000 /**< User-defined log type 3. */ +#define RTE_LOGTYPE_USER4 0x08000000 /**< User-defined log type 4. */ +#define RTE_LOGTYPE_USER5 0x10000000 /**< User-defined log type 5. */ +#define RTE_LOGTYPE_USER6 0x20000000 /**< User-defined log type 6. */ +#define RTE_LOGTYPE_USER7 0x40000000 /**< User-defined log type 7. */ +#define RTE_LOGTYPE_USER8 0x80000000 /**< User-defined log type 8. */ + +/* Can't use 0, as it gives compiler warnings */ +#define RTE_LOG_EMERG 1U /**< System is unusable. */ +#define RTE_LOG_ALERT 2U /**< Action must be taken immediately. */ +#define RTE_LOG_CRIT 3U /**< Critical conditions. */ +#define RTE_LOG_ERR 4U /**< Error conditions. */ +#define RTE_LOG_WARNING 5U /**< Warning conditions. */ +#define RTE_LOG_NOTICE 6U /**< Normal but significant condition. */ +#define RTE_LOG_INFO 7U /**< Informational. */ +#define RTE_LOG_DEBUG 8U /**< Debug-level messages. */ + +/** The default log stream. */ +extern FILE *eal_default_log_stream; + +/** + * Change the stream that will be used by the logging system. + * + * This can be done at any time. The f argument represents the stream + * to be used to send the logs. If f is NULL, the default output is + * used (stderr). + * + * @param f + * Pointer to the stream. + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_openlog_stream(FILE *f); + +/** + * Set the global log level. + * + * After this call, all logs that are lower or equal than level and + * lower or equal than the RTE_LOG_LEVEL configuration option will be + * displayed. + * + * @param level + * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). + */ +void rte_set_log_level(uint32_t level); + +/** + * Get the global log level. + */ +uint32_t rte_get_log_level(void); + +/** + * Enable or disable the log type. + * + * @param type + * Log type, for example, RTE_LOGTYPE_EAL. + * @param enable + * True for enable; false for disable. + */ +void rte_set_log_type(uint32_t type, int enable); + +/** + * Get the global log type. + */ +uint32_t rte_get_log_type(void); + +/** + * Get the current loglevel for the message being processed. + * + * Before calling the user-defined stream for logging, the log + * subsystem sets a per-lcore variable containing the loglevel and the + * logtype of the message being processed. This information can be + * accessed by the user-defined log output function through this + * function. + * + * @return + * The loglevel of the message being processed. + */ +int rte_log_cur_msg_loglevel(void); + +/** + * Get the current logtype for the message being processed. + * + * Before calling the user-defined stream for logging, the log + * subsystem sets a per-lcore variable containing the loglevel and the + * logtype of the message being processed. This information can be + * accessed by the user-defined log output function through this + * function. + * + * @return + * The logtype of the message being processed. + */ +int rte_log_cur_msg_logtype(void); + +/** + * Enable or disable the history (enabled by default) + * + * @param enable + * true to enable, or 0 to disable history. + */ +void rte_log_set_history(int enable); + +/** + * Dump the log history to a file + * + * @param f + * A pointer to a file for output + */ +void rte_log_dump_history(FILE *f); + +/** + * Add a log message to the history. + * + * This function can be called from a user-defined log stream. It adds + * the given message in the history that can be dumped using + * rte_log_dump_history(). + * + * @param buf + * A data buffer containing the message to be saved in the history. + * @param size + * The length of the data buffer. + * @return + * - 0: Success. + * - (-ENOBUFS) if there is no room to store the message. + */ +int rte_log_add_in_history(const char *buf, size_t size); + +/** + * Generates a log message. + * + * The message will be sent in the stream defined by the previous call + * to rte_openlog_stream(). + * + * The level argument determines if the log should be displayed or + * not, depending on the global rte_logs variable. + * + * The preferred alternative is the RTE_LOG() function because debug logs may + * be removed at compilation time if optimization is enabled. Moreover, + * logs are automatically prefixed by type when using the macro. + * + * @param level + * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). + * @param logtype + * The log type, for example, RTE_LOGTYPE_EAL. + * @param format + * The format string, as in printf(3), followed by the variable arguments + * required by the format. + * @return + * - 0: Success. + * - Negative on error. + */ +int rte_log(uint32_t level, uint32_t logtype, const char *format, ...) +#ifdef __GNUC__ +#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2)) + __attribute__((cold)) +#endif +#endif + __attribute__((format(printf, 3, 4))); + +/** + * Generates a log message. + * + * The message will be sent in the stream defined by the previous call + * to rte_openlog_stream(). + * + * The level argument determines if the log should be displayed or + * not, depending on the global rte_logs variable. A trailing + * newline may be added if needed. + * + * The preferred alternative is the RTE_LOG() because debug logs may be + * removed at compilation time. + * + * @param level + * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). + * @param logtype + * The log type, for example, RTE_LOGTYPE_EAL. + * @param format + * The format string, as in printf(3), followed by the variable arguments + * required by the format. + * @param ap + * The va_list of the variable arguments required by the format. + * @return + * - 0: Success. + * - Negative on error. + */ +int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) + __attribute__((format(printf,3,0))); + +/** + * Generates a log message. + * + * The RTE_LOG() is equivalent to rte_log() with two differences: + + * - RTE_LOG() can be used to remove debug logs at compilation time, + * depending on RTE_LOG_LEVEL configuration option, and compilation + * optimization level. If optimization is enabled, the tests + * involving constants only are pre-computed. If compilation is done + * with -O0, these tests will be done at run time. + * - The log level and log type names are smaller, for example: + * RTE_LOG(INFO, EAL, "this is a %s", "log"); + * + * @param l + * Log level. A value between EMERG (1) and DEBUG (8). The short name is + * expanded by the macro, so it cannot be an integer value. + * @param t + * The log type, for example, EAL. The short name is expanded by the + * macro, so it cannot be an integer value. + * @param ... + * The fmt string, as in printf(3), followed by the variable arguments + * required by the format. + * @return + * - 0: Success. + * - Negative on error. + */ +#define RTE_LOG(l, t, ...) \ + (void)((RTE_LOG_ ## l <= RTE_LOG_LEVEL) ? \ + rte_log(RTE_LOG_ ## l, \ + RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) : \ + 0) + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LOG_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_malloc.h b/src/dpdk22/lib/librte_eal/common/include/rte_malloc.h new file mode 100644 index 00000000..74bb78c7 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_malloc.h @@ -0,0 +1,342 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MALLOC_H_ +#define _RTE_MALLOC_H_ + +/** + * @file + * RTE Malloc. This library provides methods for dynamically allocating memory + * from hugepages. + */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Structure to hold heap statistics obtained from rte_malloc_get_socket_stats function. + */ +struct rte_malloc_socket_stats { + size_t heap_totalsz_bytes; /**< Total bytes on heap */ + size_t heap_freesz_bytes; /**< Total free bytes on heap */ + size_t greatest_free_size; /**< Size in bytes of largest free block */ + unsigned free_count; /**< Number of free elements on heap */ + unsigned alloc_count; /**< Number of allocated elements on heap */ + size_t heap_allocsz_bytes; /**< Total allocated bytes on heap */ +}; + +/** + * This function allocates memory from the huge-page area of memory. The memory + * is not cleared. In NUMA systems, the memory allocated resides on the same + * NUMA socket as the core that calls this function. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param size + * Size (in bytes) to be allocated. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_malloc(const char *type, size_t size, unsigned align); + +/** + * Allocate zero'ed memory from the heap. + * + * Equivalent to rte_malloc() except that the memory zone is + * initialised with zeros. In NUMA systems, the memory allocated resides on the + * same NUMA socket as the core that calls this function. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param size + * Size (in bytes) to be allocated. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must obviously be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_zmalloc(const char *type, size_t size, unsigned align); + +/** + * Replacement function for calloc(), using huge-page memory. Memory area is + * initialised with zeros. In NUMA systems, the memory allocated resides on the + * same NUMA socket as the core that calls this function. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param num + * Number of elements to be allocated. + * @param size + * Size (in bytes) of a single element. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must obviously be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_calloc(const char *type, size_t num, size_t size, unsigned align); + +/** + * Replacement function for realloc(), using huge-page memory. Reserved area + * memory is resized, preserving contents. In NUMA systems, the new area + * resides on the same NUMA socket as the old area. + * + * @param ptr + * Pointer to already allocated memory + * @param size + * Size (in bytes) of new area. If this is 0, memory is freed. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must obviously be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the reallocated memory. + */ +void * +rte_realloc(void *ptr, size_t size, unsigned align); + +/** + * This function allocates memory from the huge-page area of memory. The memory + * is not cleared. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param size + * Size (in bytes) to be allocated. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @param socket + * NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function + * will behave the same as rte_malloc(). + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_malloc_socket(const char *type, size_t size, unsigned align, int socket); + +/** + * Allocate zero'ed memory from the heap. + * + * Equivalent to rte_malloc() except that the memory zone is + * initialised with zeros. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param size + * Size (in bytes) to be allocated. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must obviously be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @param socket + * NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function + * will behave the same as rte_zmalloc(). + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_zmalloc_socket(const char *type, size_t size, unsigned align, int socket); + +/** + * Replacement function for calloc(), using huge-page memory. Memory area is + * initialised with zeros. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param num + * Number of elements to be allocated. + * @param size + * Size (in bytes) of a single element. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must obviously be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @param socket + * NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function + * will behave the same as rte_calloc(). + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int socket); + +/** + * Frees the memory space pointed to by the provided pointer. + * + * This pointer must have been returned by a previous call to + * rte_malloc(), rte_zmalloc(), rte_calloc() or rte_realloc(). The behaviour of + * rte_free() is undefined if the pointer does not match this requirement. + * + * If the pointer is NULL, the function does nothing. + * + * @param ptr + * The pointer to memory to be freed. + */ +void +rte_free(void *ptr); + +/** + * If malloc debug is enabled, check a memory block for header + * and trailer markers to indicate that all is well with the block. + * If size is non-null, also return the size of the block. + * + * @param ptr + * pointer to the start of a data block, must have been returned + * by a previous call to rte_malloc(), rte_zmalloc(), rte_calloc() + * or rte_realloc() + * @param size + * if non-null, and memory block pointer is valid, returns the size + * of the memory block + * @return + * -1 on error, invalid pointer passed or header and trailer markers + * are missing or corrupted + * 0 on success + */ +int +rte_malloc_validate(const void *ptr, size_t *size); + +/** + * Get heap statistics for the specified heap. + * + * @param socket + * An unsigned integer specifying the socket to get heap statistics for + * @param socket_stats + * A structure which provides memory to store statistics + * @return + * Null on error + * Pointer to structure storing statistics on success + */ +int +rte_malloc_get_socket_stats(int socket, + struct rte_malloc_socket_stats *socket_stats); + +/** + * Dump statistics. + * + * Dump for the specified type to the console. If the type argument is + * NULL, all memory types will be dumped. + * + * @param f + * A pointer to a file for output + * @param type + * A string identifying the type of objects to dump, or NULL + * to dump all objects. + */ +void +rte_malloc_dump_stats(FILE *f, const char *type); + +/** + * Set the maximum amount of allocated memory for this type. + * + * This is not yet implemented + * + * @param type + * A string identifying the type of allocated objects. + * @param max + * The maximum amount of allocated bytes for this type. + * @return + * - 0: Success. + * - (-1): Error. + */ +int +rte_malloc_set_limit(const char *type, size_t max); + +/** + * Return the physical address of a virtual address obtained through + * rte_malloc + * + * @param addr + * Adress obtained from a previous rte_malloc call + * @return + * NULL on error + * otherwise return physical address of the buffer + */ +phys_addr_t +rte_malloc_virt2phy(const void *addr); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MALLOC_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_malloc_heap.h b/src/dpdk22/lib/librte_eal/common/include/rte_malloc_heap.h new file mode 100644 index 00000000..b2703562 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_malloc_heap.h @@ -0,0 +1,55 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MALLOC_HEAP_H_ +#define _RTE_MALLOC_HEAP_H_ + +#include +#include +#include +#include + +/* Number of free lists per heap, grouped by size. */ +#define RTE_HEAP_NUM_FREELISTS 13 + +/** + * Structure to hold malloc heap + */ +struct malloc_heap { + rte_spinlock_t lock; + LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS]; + unsigned alloc_count; + size_t total_size; +} __rte_cache_aligned; + +#endif /* _RTE_MALLOC_HEAP_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_memory.h b/src/dpdk22/lib/librte_eal/common/include/rte_memory.h new file mode 100644 index 00000000..9c9e40f2 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_memory.h @@ -0,0 +1,250 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMORY_H_ +#define _RTE_MEMORY_H_ + +/** + * @file + * + * Memory-related RTE API. + */ + +#include +#include +#include + +#ifdef RTE_EXEC_ENV_LINUXAPP +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +enum rte_page_sizes { + RTE_PGSIZE_4K = 1ULL << 12, + RTE_PGSIZE_64K = 1ULL << 16, + RTE_PGSIZE_256K = 1ULL << 18, + RTE_PGSIZE_2M = 1ULL << 21, + RTE_PGSIZE_16M = 1ULL << 24, + RTE_PGSIZE_256M = 1ULL << 28, + RTE_PGSIZE_512M = 1ULL << 29, + RTE_PGSIZE_1G = 1ULL << 30, + RTE_PGSIZE_4G = 1ULL << 32, + RTE_PGSIZE_16G = 1ULL << 34, +}; + +#define SOCKET_ID_ANY -1 /**< Any NUMA socket. */ +#ifndef RTE_CACHE_LINE_SIZE +#define RTE_CACHE_LINE_SIZE 64 /**< Cache line size. */ +#endif +#define RTE_CACHE_LINE_MASK (RTE_CACHE_LINE_SIZE-1) /**< Cache line mask. */ + +#define RTE_CACHE_LINE_ROUNDUP(size) \ + (RTE_CACHE_LINE_SIZE * ((size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE)) +/**< Return the first cache-aligned value greater or equal to size. */ + +/** + * Force alignment to cache line. + */ +#define __rte_cache_aligned __rte_aligned(RTE_CACHE_LINE_SIZE) + +typedef uint64_t phys_addr_t; /**< Physical address definition. */ +#define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1) + +/** + * Physical memory segment descriptor. + */ +struct rte_memseg { + phys_addr_t phys_addr; /**< Start physical address. */ + union { + void *addr; /**< Start virtual address. */ + uint64_t addr_64; /**< Makes sure addr is always 64 bits */ + }; +#ifdef RTE_LIBRTE_IVSHMEM + phys_addr_t ioremap_addr; /**< Real physical address inside the VM */ +#endif + size_t len; /**< Length of the segment. */ + uint64_t hugepage_sz; /**< The pagesize of underlying memory */ + int32_t socket_id; /**< NUMA socket ID. */ + uint32_t nchannel; /**< Number of channels. */ + uint32_t nrank; /**< Number of ranks. */ +#ifdef RTE_LIBRTE_XEN_DOM0 + /**< store segment MFNs */ + uint64_t mfn[DOM0_NUM_MEMBLOCK]; +#endif +} __rte_packed; + +/** + * Lock page in physical memory and prevent from swapping. + * + * @param virt + * The virtual address. + * @return + * 0 on success, negative on error. + */ +int rte_mem_lock_page(const void *virt); + +/** + * Get physical address of any mapped virtual address in the current process. + * It is found by browsing the /proc/self/pagemap special file. + * The page must be locked. + * + * @param virt + * The virtual address. + * @return + * The physical address or RTE_BAD_PHYS_ADDR on error. + */ +phys_addr_t rte_mem_virt2phy(const void *virt); + +/** + * Get the layout of the available physical memory. + * + * It can be useful for an application to have the full physical + * memory layout to decide the size of a memory zone to reserve. This + * table is stored in rte_config (see rte_eal_get_configuration()). + * + * @return + * - On success, return a pointer to a read-only table of struct + * rte_physmem_desc elements, containing the layout of all + * addressable physical memory. The last element of the table + * contains a NULL address. + * - On error, return NULL. This should not happen since it is a fatal + * error that will probably cause the entire system to panic. + */ +const struct rte_memseg *rte_eal_get_physmem_layout(void); + +/** + * Dump the physical memory layout to the console. + * + * @param f + * A pointer to a file for output + */ +void rte_dump_physmem_layout(FILE *f); + +/** + * Get the total amount of available physical memory. + * + * @return + * The total amount of available physical memory in bytes. + */ +uint64_t rte_eal_get_physmem_size(void); + +/** + * Get the number of memory channels. + * + * @return + * The number of memory channels on the system. The value is 0 if unknown + * or not the same on all devices. + */ +unsigned rte_memory_get_nchannel(void); + +/** + * Get the number of memory ranks. + * + * @return + * The number of memory ranks on the system. The value is 0 if unknown or + * not the same on all devices. + */ +unsigned rte_memory_get_nrank(void); + +#ifdef RTE_LIBRTE_XEN_DOM0 + +/**< Internal use only - should DOM0 memory mapping be used */ +extern int rte_xen_dom0_supported(void); + +/**< Internal use only - phys to virt mapping for xen */ +phys_addr_t rte_xen_mem_phy2mch(uint32_t, const phys_addr_t); + +/** + * Return the physical address of elt, which is an element of the pool mp. + * + * @param memseg_id + * The mempool is from which memory segment. + * @param phy_addr + * physical address of elt. + * + * @return + * The physical address or error. + */ +static inline phys_addr_t +rte_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr) +{ + if (rte_xen_dom0_supported()) + return rte_xen_mem_phy2mch(memseg_id, phy_addr); + else + return phy_addr; +} + +/** + * Memory init for supporting application running on Xen domain0. + * + * @param void + * + * @return + * 0: successfully + * negative: error + */ +int rte_xen_dom0_memory_init(void); + +/** + * Attach to memory setments of primary process on Xen domain0. + * + * @param void + * + * @return + * 0: successfully + * negative: error + */ +int rte_xen_dom0_memory_attach(void); +#else +static inline int rte_xen_dom0_supported(void) +{ + return 0; +} + +static inline phys_addr_t +rte_mem_phy2mch(uint32_t memseg_id __rte_unused, const phys_addr_t phy_addr) +{ + return phy_addr; +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMORY_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_memzone.h b/src/dpdk22/lib/librte_eal/common/include/rte_memzone.h new file mode 100644 index 00000000..f69b5a87 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_memzone.h @@ -0,0 +1,305 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMZONE_H_ +#define _RTE_MEMZONE_H_ + +/** + * @file + * RTE Memzone + * + * The goal of the memzone allocator is to reserve contiguous + * portions of physical memory. These zones are identified by a name. + * + * The memzone descriptors are shared by all partitions and are + * located in a known place of physical memory. This zone is accessed + * using rte_eal_get_configuration(). The lookup (by name) of a + * memory zone can be done in any partition and returns the same + * physical address. + * + * A reserved memory zone cannot be unreserved. The reservation shall + * be done at initialization time only. + */ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_MEMZONE_2MB 0x00000001 /**< Use 2MB pages. */ +#define RTE_MEMZONE_1GB 0x00000002 /**< Use 1GB pages. */ +#define RTE_MEMZONE_16MB 0x00000100 /**< Use 16MB pages. */ +#define RTE_MEMZONE_16GB 0x00000200 /**< Use 16GB pages. */ +#define RTE_MEMZONE_256KB 0x00010000 /**< Use 256KB pages. */ +#define RTE_MEMZONE_256MB 0x00020000 /**< Use 256MB pages. */ +#define RTE_MEMZONE_512MB 0x00040000 /**< Use 512MB pages. */ +#define RTE_MEMZONE_4GB 0x00080000 /**< Use 4GB pages. */ +#define RTE_MEMZONE_SIZE_HINT_ONLY 0x00000004 /**< Use available page size */ + +/** + * A structure describing a memzone, which is a contiguous portion of + * physical memory identified by a name. + */ +struct rte_memzone { + +#define RTE_MEMZONE_NAMESIZE 32 /**< Maximum length of memory zone name.*/ + char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the memory zone. */ + + phys_addr_t phys_addr; /**< Start physical address. */ + union { + void *addr; /**< Start virtual address. */ + uint64_t addr_64; /**< Makes sure addr is always 64-bits */ + }; +#ifdef RTE_LIBRTE_IVSHMEM + phys_addr_t ioremap_addr; /**< Real physical address inside the VM */ +#endif + size_t len; /**< Length of the memzone. */ + + uint64_t hugepage_sz; /**< The page size of underlying memory */ + + int32_t socket_id; /**< NUMA socket ID. */ + + uint32_t flags; /**< Characteristics of this memzone. */ + uint32_t memseg_id; /**< Memseg it belongs. */ +} __attribute__((__packed__)); + +/** + * Reserve a portion of physical memory. + * + * This function reserves some memory and returns a pointer to a + * correctly filled memzone descriptor. If the allocation cannot be + * done, return NULL. + * + * @param name + * The name of the memzone. If it already exists, the function will + * fail and return NULL. + * @param len + * The size of the memory to be reserved. If it + * is 0, the biggest contiguous zone will be reserved. + * @param socket_id + * The socket identifier in the case of + * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The flags parameter is used to request memzones to be + * taken from specifically sized hugepages. + * - RTE_MEMZONE_2MB - Reserved from 2MB pages + * - RTE_MEMZONE_1GB - Reserved from 1GB pages + * - RTE_MEMZONE_16MB - Reserved from 16MB pages + * - RTE_MEMZONE_16GB - Reserved from 16GB pages + * - RTE_MEMZONE_256KB - Reserved from 256KB pages + * - RTE_MEMZONE_256MB - Reserved from 256MB pages + * - RTE_MEMZONE_512MB - Reserved from 512MB pages + * - RTE_MEMZONE_4GB - Reserved from 4GB pages + * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if + * the requested page size is unavailable. + * If this flag is not set, the function + * will return error on an unavailable size + * request. + * @return + * A pointer to a correctly-filled read-only memzone descriptor, or NULL + * on error. + * On error case, rte_errno will be set appropriately: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + * - EINVAL - invalid parameters + */ +const struct rte_memzone *rte_memzone_reserve(const char *name, + size_t len, int socket_id, + unsigned flags); + +/** + * Reserve a portion of physical memory with alignment on a specified + * boundary. + * + * This function reserves some memory with alignment on a specified + * boundary, and returns a pointer to a correctly filled memzone + * descriptor. If the allocation cannot be done or if the alignment + * is not a power of 2, returns NULL. + * + * @param name + * The name of the memzone. If it already exists, the function will + * fail and return NULL. + * @param len + * The size of the memory to be reserved. If it + * is 0, the biggest contiguous zone will be reserved. + * @param socket_id + * The socket identifier in the case of + * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The flags parameter is used to request memzones to be + * taken from specifically sized hugepages. + * - RTE_MEMZONE_2MB - Reserved from 2MB pages + * - RTE_MEMZONE_1GB - Reserved from 1GB pages + * - RTE_MEMZONE_16MB - Reserved from 16MB pages + * - RTE_MEMZONE_16GB - Reserved from 16GB pages + * - RTE_MEMZONE_256KB - Reserved from 256KB pages + * - RTE_MEMZONE_256MB - Reserved from 256MB pages + * - RTE_MEMZONE_512MB - Reserved from 512MB pages + * - RTE_MEMZONE_4GB - Reserved from 4GB pages + * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if + * the requested page size is unavailable. + * If this flag is not set, the function + * will return error on an unavailable size + * request. + * @param align + * Alignment for resulting memzone. Must be a power of 2. + * @return + * A pointer to a correctly-filled read-only memzone descriptor, or NULL + * on error. + * On error case, rte_errno will be set appropriately: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + * - EINVAL - invalid parameters + */ +const struct rte_memzone *rte_memzone_reserve_aligned(const char *name, + size_t len, int socket_id, + unsigned flags, unsigned align); + +/** + * Reserve a portion of physical memory with specified alignment and + * boundary. + * + * This function reserves some memory with specified alignment and + * boundary, and returns a pointer to a correctly filled memzone + * descriptor. If the allocation cannot be done or if the alignment + * or boundary are not a power of 2, returns NULL. + * Memory buffer is reserved in a way, that it wouldn't cross specified + * boundary. That implies that requested length should be less or equal + * then boundary. + * + * @param name + * The name of the memzone. If it already exists, the function will + * fail and return NULL. + * @param len + * The size of the memory to be reserved. If it + * is 0, the biggest contiguous zone will be reserved. + * @param socket_id + * The socket identifier in the case of + * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The flags parameter is used to request memzones to be + * taken from specifically sized hugepages. + * - RTE_MEMZONE_2MB - Reserved from 2MB pages + * - RTE_MEMZONE_1GB - Reserved from 1GB pages + * - RTE_MEMZONE_16MB - Reserved from 16MB pages + * - RTE_MEMZONE_16GB - Reserved from 16GB pages + * - RTE_MEMZONE_256KB - Reserved from 256KB pages + * - RTE_MEMZONE_256MB - Reserved from 256MB pages + * - RTE_MEMZONE_512MB - Reserved from 512MB pages + * - RTE_MEMZONE_4GB - Reserved from 4GB pages + * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if + * the requested page size is unavailable. + * If this flag is not set, the function + * will return error on an unavailable size + * request. + * @param align + * Alignment for resulting memzone. Must be a power of 2. + * @param bound + * Boundary for resulting memzone. Must be a power of 2 or zero. + * Zero value implies no boundary condition. + * @return + * A pointer to a correctly-filled read-only memzone descriptor, or NULL + * on error. + * On error case, rte_errno will be set appropriately: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + * - EINVAL - invalid parameters + */ +const struct rte_memzone *rte_memzone_reserve_bounded(const char *name, + size_t len, int socket_id, + unsigned flags, unsigned align, unsigned bound); + +/** + * Free a memzone. + * + * Note: an IVSHMEM zone cannot be freed. + * + * @param mz + * A pointer to the memzone + * @return + * -EINVAL - invalid parameter, IVSHMEM memzone. + * 0 - success + */ +int rte_memzone_free(const struct rte_memzone *mz); + +/** + * Lookup for a memzone. + * + * Get a pointer to a descriptor of an already reserved memory + * zone identified by the name given as an argument. + * + * @param name + * The name of the memzone. + * @return + * A pointer to a read-only memzone descriptor. + */ +const struct rte_memzone *rte_memzone_lookup(const char *name); + +/** + * Dump all reserved memzones to the console. + * + * @param f + * A pointer to a file for output + */ +void rte_memzone_dump(FILE *f); + +/** + * Walk list of all memzones + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + */ +void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg), + void *arg); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMZONE_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_pci.h b/src/dpdk22/lib/librte_eal/common/include/rte_pci.h new file mode 100644 index 00000000..334c12e5 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_pci.h @@ -0,0 +1,504 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* BSD LICENSE + * + * Copyright 2013-2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PCI_H_ +#define _RTE_PCI_H_ + +/** + * @file + * + * RTE PCI Interface + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include + +TAILQ_HEAD(pci_device_list, rte_pci_device); /**< PCI devices in D-linked Q. */ +TAILQ_HEAD(pci_driver_list, rte_pci_driver); /**< PCI drivers in D-linked Q. */ + +extern struct pci_driver_list pci_driver_list; /**< Global list of PCI drivers. */ +extern struct pci_device_list pci_device_list; /**< Global list of PCI devices. */ + +/** Pathname of PCI devices directory. */ +#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices" + +/** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */ +#define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 + +/** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */ +#define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 + +/** Nb. of values in PCI device identifier format string. */ +#define PCI_FMT_NVAL 4 + +/** Nb. of values in PCI resource format. */ +#define PCI_RESOURCE_FMT_NVAL 3 + +/** IO resource type: memory address space */ +#define IORESOURCE_MEM 0x00000200 + +/** + * A structure describing a PCI resource. + */ +struct rte_pci_resource { + uint64_t phys_addr; /**< Physical address, 0 if no resource. */ + uint64_t len; /**< Length of the resource. */ + void *addr; /**< Virtual address, NULL when not mapped. */ +}; + +/** Maximum number of PCI resources. */ +#define PCI_MAX_RESOURCE 6 + +/** + * A structure describing an ID for a PCI driver. Each driver provides a + * table of these IDs for each device that it supports. + */ +struct rte_pci_id { + uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */ + uint16_t device_id; /**< Device ID or PCI_ANY_ID. */ + uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */ + uint16_t subsystem_device_id; /**< Subsystem device ID or PCI_ANY_ID. */ +}; + +/** + * A structure describing the location of a PCI device. + */ +struct rte_pci_addr { + uint16_t domain; /**< Device domain */ + uint8_t bus; /**< Device bus */ + uint8_t devid; /**< Device ID */ + uint8_t function; /**< Device function. */ +}; + +struct rte_devargs; + +enum rte_kernel_driver { + RTE_KDRV_UNKNOWN = 0, + RTE_KDRV_IGB_UIO, + RTE_KDRV_VFIO, + RTE_KDRV_UIO_GENERIC, + RTE_KDRV_NIC_UIO, + RTE_KDRV_NONE, +}; + +/** + * A structure describing a PCI device. + */ +struct rte_pci_device { + TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */ + struct rte_pci_addr addr; /**< PCI location. */ + struct rte_pci_id id; /**< PCI ID. */ + struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE]; /**< PCI Memory Resource */ + struct rte_intr_handle intr_handle; /**< Interrupt handle */ + struct rte_pci_driver *driver; /**< Associated driver */ + uint16_t max_vfs; /**< sriov enable if not zero */ + int numa_node; /**< NUMA node connection */ + struct rte_devargs *devargs; /**< Device user arguments */ + enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ +}; + +/** Any PCI device identifier (vendor, device, ...) */ +#define PCI_ANY_ID (0xffff) + +#ifdef __cplusplus +/** C++ macro used to help building up tables of device IDs */ +#define RTE_PCI_DEVICE(vend, dev) \ + (vend), \ + (dev), \ + PCI_ANY_ID, \ + PCI_ANY_ID +#else +/** Macro used to help building up tables of device IDs */ +#define RTE_PCI_DEVICE(vend, dev) \ + .vendor_id = (vend), \ + .device_id = (dev), \ + .subsystem_vendor_id = PCI_ANY_ID, \ + .subsystem_device_id = PCI_ANY_ID +#endif + +struct rte_pci_driver; + +/** + * Initialisation function for the driver called during PCI probing. + */ +typedef int (pci_devinit_t)(struct rte_pci_driver *, struct rte_pci_device *); + +/** + * Uninitialisation function for the driver called during hotplugging. + */ +typedef int (pci_devuninit_t)(struct rte_pci_device *); + +/** + * A structure describing a PCI driver. + */ +struct rte_pci_driver { + TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */ + const char *name; /**< Driver name. */ + pci_devinit_t *devinit; /**< Device init. function. */ + pci_devuninit_t *devuninit; /**< Device uninit function. */ + const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ + uint32_t drv_flags; /**< Flags contolling handling of device. */ +}; + +/** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */ +#define RTE_PCI_DRV_NEED_MAPPING 0x0001 +/** Device driver must be registered several times until failure - deprecated */ +#pragma GCC poison RTE_PCI_DRV_MULTIPLE +/** Device needs to be unbound even if no module is provided */ +#define RTE_PCI_DRV_FORCE_UNBIND 0x0004 +/** Device driver supports link state interrupt */ +#define RTE_PCI_DRV_INTR_LSC 0x0008 +/** Device driver supports detaching capability */ +#define RTE_PCI_DRV_DETACHABLE 0x0010 + +/** + * A structure describing a PCI mapping. + */ +struct pci_map { + void *addr; + char *path; + uint64_t offset; + uint64_t size; + uint64_t phaddr; +}; + +/** + * A structure describing a mapped PCI resource. + * For multi-process we need to reproduce all PCI mappings in secondary + * processes, so save them in a tailq. + */ +struct mapped_pci_resource { + TAILQ_ENTRY(mapped_pci_resource) next; + + struct rte_pci_addr pci_addr; + char path[PATH_MAX]; + int nb_maps; + struct pci_map maps[PCI_MAX_RESOURCE]; +}; + +/** mapped pci device list */ +TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource); + +/**< Internal use only - Macro used by pci addr parsing functions **/ +#define GET_PCIADDR_FIELD(in, fd, lim, dlm) \ +do { \ + unsigned long val; \ + char *end; \ + errno = 0; \ + val = strtoul((in), &end, 16); \ + if (errno != 0 || end[0] != (dlm) || val > (lim)) \ + return -EINVAL; \ + (fd) = (typeof (fd))val; \ + (in) = end + 1; \ +} while(0) + +/** + * Utility function to produce a PCI Bus-Device-Function value + * given a string representation. Assumes that the BDF is provided without + * a domain prefix (i.e. domain returned is always 0) + * + * @param input + * The input string to be parsed. Should have the format XX:XX.X + * @param dev_addr + * The PCI Bus-Device-Function address to be returned. Domain will always be + * returned as 0 + * @return + * 0 on success, negative on error. + */ +static inline int +eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr) +{ + dev_addr->domain = 0; + GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':'); + GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.'); + GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0); + return 0; +} + +/** + * Utility function to produce a PCI Bus-Device-Function value + * given a string representation. Assumes that the BDF is provided including + * a domain prefix. + * + * @param input + * The input string to be parsed. Should have the format XXXX:XX:XX.X + * @param dev_addr + * The PCI Bus-Device-Function address to be returned + * @return + * 0 on success, negative on error. + */ +static inline int +eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr) +{ + GET_PCIADDR_FIELD(input, dev_addr->domain, UINT16_MAX, ':'); + GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':'); + GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.'); + GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0); + return 0; +} +#undef GET_PCIADDR_FIELD + +/* Compare two PCI device addresses. */ +/** + * Utility function to compare two PCI device addresses. + * + * @param addr + * The PCI Bus-Device-Function address to compare + * @param addr2 + * The PCI Bus-Device-Function address to compare + * @return + * 0 on equal PCI address. + * Positive on addr is greater than addr2. + * Negative on addr is less than addr2, or error. + */ +static inline int +rte_eal_compare_pci_addr(const struct rte_pci_addr *addr, + const struct rte_pci_addr *addr2) +{ + uint64_t dev_addr, dev_addr2; + + if ((addr == NULL) || (addr2 == NULL)) + return -1; + + dev_addr = (addr->domain << 24) | (addr->bus << 16) | + (addr->devid << 8) | addr->function; + dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) | + (addr2->devid << 8) | addr2->function; + + if (dev_addr > dev_addr2) + return 1; + else if (dev_addr < dev_addr2) + return -1; + else + return 0; +} + +/** + * Scan the content of the PCI bus, and the devices in the devices + * list + * + * @return + * 0 on success, negative on error + */ +int rte_eal_pci_scan(void); + +/** + * Probe the PCI bus for registered drivers. + * + * Scan the content of the PCI bus, and call the probe() function for + * all registered drivers that have a matching entry in its id_table + * for discovered devices. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_eal_pci_probe(void); + +/** + * @internal + * Map a particular resource from a file. + * + * @param requested_addr + * The starting address for the new mapping range. + * @param fd + * The file descriptor. + * @param offset + * The offset for the mapping range. + * @param size + * The size for the mapping range. + * @param additional_flags + * The additional flags for the mapping range. + * @return + * - On success, the function returns a pointer to the mapped area. + * - On error, the value MAP_FAILED is returned. + */ +void *pci_map_resource(void *requested_addr, int fd, off_t offset, + size_t size, int additional_flags); + +/** + * @internal + * Unmap a particular resource. + * + * @param requested_addr + * The address for the unmapping range. + * @param size + * The size for the unmapping range. + */ +void pci_unmap_resource(void *requested_addr, size_t size); + +/** + * Probe the single PCI device. + * + * Scan the content of the PCI bus, and find the pci device specified by pci + * address, then call the probe() function for registered driver that has a + * matching entry in its id_table for discovered device. + * + * @param addr + * The PCI Bus-Device-Function address to probe. + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_eal_pci_probe_one(const struct rte_pci_addr *addr); + +/** + * Close the single PCI device. + * + * Scan the content of the PCI bus, and find the pci device specified by pci + * address, then call the devuninit() function for registered driver that has a + * matching entry in its id_table for discovered device. + * + * @param addr + * The PCI Bus-Device-Function address to close. + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_eal_pci_detach(const struct rte_pci_addr *addr); + +/** + * Dump the content of the PCI bus. + * + * @param f + * A pointer to a file for output + */ +void rte_eal_pci_dump(FILE *f); + +/** + * Register a PCI driver. + * + * @param driver + * A pointer to a rte_pci_driver structure describing the driver + * to be registered. + */ +void rte_eal_pci_register(struct rte_pci_driver *driver); + +/** + * Unregister a PCI driver. + * + * @param driver + * A pointer to a rte_pci_driver structure describing the driver + * to be unregistered. + */ +void rte_eal_pci_unregister(struct rte_pci_driver *driver); + +/** + * Read PCI config space. + * + * @param device + * A pointer to a rte_pci_device structure describing the device + * to use + * @param buf + * A data buffer where the bytes should be read into + * @param len + * The length of the data buffer. + * @param offset + * The offset into PCI config space + */ +int rte_eal_pci_read_config(const struct rte_pci_device *device, + void *buf, size_t len, off_t offset); + +/** + * Write PCI config space. + * + * @param device + * A pointer to a rte_pci_device structure describing the device + * to use + * @param buf + * A data buffer containing the bytes should be written + * @param len + * The length of the data buffer. + * @param offset + * The offset into PCI config space + */ +int rte_eal_pci_write_config(const struct rte_pci_device *device, + const void *buf, size_t len, off_t offset); + +#ifdef RTE_PCI_CONFIG +/** + * Set special config space registers for performance purpose. + * + * @param dev + * A pointer to a rte_pci_device structure describing the device + * to use + */ +void pci_config_space_set(struct rte_pci_device *dev); +#endif /* RTE_PCI_CONFIG */ + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PCI_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h b/src/dpdk22/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h new file mode 100644 index 00000000..08222510 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h @@ -0,0 +1,70 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PCI_DEV_DEFS_H_ +#define _RTE_PCI_DEV_DEFS_H_ + +/* interrupt mode */ +enum rte_intr_mode { + RTE_INTR_MODE_NONE = 0, + RTE_INTR_MODE_LEGACY, + RTE_INTR_MODE_MSI, + RTE_INTR_MODE_MSIX +}; + +#endif /* _RTE_PCI_DEV_DEFS_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_pci_dev_features.h b/src/dpdk22/lib/librte_eal/common/include/rte_pci_dev_features.h new file mode 100644 index 00000000..67b986a6 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_pci_dev_features.h @@ -0,0 +1,69 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PCI_DEV_FEATURES_H +#define _RTE_PCI_DEV_FEATURES_H + +#include + +#define RTE_INTR_MODE_NONE_NAME "none" +#define RTE_INTR_MODE_LEGACY_NAME "legacy" +#define RTE_INTR_MODE_MSI_NAME "msi" +#define RTE_INTR_MODE_MSIX_NAME "msix" + +#endif diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_pci_dev_ids.h b/src/dpdk22/lib/librte_eal/common/include/rte_pci_dev_ids.h new file mode 100644 index 00000000..e31b9345 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_pci_dev_ids.h @@ -0,0 +1,667 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/** + * @file + * + * This file contains a list of the PCI device IDs recognised by DPDK, which + * can be used to fill out an array of structures describing the devices. + * + * Currently four families of devices are recognised: those supported by the + * IGB driver, by EM driver, those supported by the IXGBE driver, and by virtio + * driver which is a para virtualization driver running in guest virtual machine. + * The inclusion of these in an array built using this file depends on the + * definition of + * RTE_PCI_DEV_ID_DECL_EM + * RTE_PCI_DEV_ID_DECL_IGB + * RTE_PCI_DEV_ID_DECL_IGBVF + * RTE_PCI_DEV_ID_DECL_IXGBE + * RTE_PCI_DEV_ID_DECL_IXGBEVF + * RTE_PCI_DEV_ID_DECL_I40E + * RTE_PCI_DEV_ID_DECL_I40EVF + * RTE_PCI_DEV_ID_DECL_VIRTIO + * at the time when this file is included. + * + * In order to populate an array, the user of this file must define this macro: + * RTE_PCI_DEV_ID_DECL_IXGBE(vendorID, deviceID). For example: + * + * @code + * struct device { + * int vend; + * int dev; + * }; + * + * struct device devices[] = { + * #define RTE_PCI_DEV_ID_DECL_IXGBE(vendorID, deviceID) {vend, dev}, + * #include + * }; + * @endcode + * + * Note that this file can be included multiple times within the same file. + */ + +#ifndef RTE_PCI_DEV_ID_DECL_EM +#define RTE_PCI_DEV_ID_DECL_EM(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_IGB +#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_IGBVF +#define RTE_PCI_DEV_ID_DECL_IGBVF(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_IXGBE +#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_IXGBEVF +#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_I40E +#define RTE_PCI_DEV_ID_DECL_I40E(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_I40EVF +#define RTE_PCI_DEV_ID_DECL_I40EVF(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_VIRTIO +#define RTE_PCI_DEV_ID_DECL_VIRTIO(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_VMXNET3 +#define RTE_PCI_DEV_ID_DECL_VMXNET3(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_FM10K +#define RTE_PCI_DEV_ID_DECL_FM10K(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_FM10KVF +#define RTE_PCI_DEV_ID_DECL_FM10KVF(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_ENIC +#define RTE_PCI_DEV_ID_DECL_ENIC(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_BNX2X +#define RTE_PCI_DEV_ID_DECL_BNX2X(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_BNX2XVF +#define RTE_PCI_DEV_ID_DECL_BNX2XVF(vend, dev) +#endif + +#ifndef PCI_VENDOR_ID_INTEL +/** Vendor ID used by Intel devices */ +#define PCI_VENDOR_ID_INTEL 0x8086 +#endif + +#ifndef PCI_VENDOR_ID_QUMRANET +/** Vendor ID used by virtio devices */ +#define PCI_VENDOR_ID_QUMRANET 0x1AF4 +#endif + +#ifndef PCI_VENDOR_ID_VMWARE +/** Vendor ID used by VMware devices */ +#define PCI_VENDOR_ID_VMWARE 0x15AD +#endif + +#ifndef PCI_VENDOR_ID_CISCO +/** Vendor ID used by Cisco VIC devices */ +#define PCI_VENDOR_ID_CISCO 0x1137 +#endif + +#ifndef PCI_VENDOR_ID_BROADCOM +/** Vendor ID used by Broadcom devices */ +#define PCI_VENDOR_ID_BROADCOM 0x14E4 +#endif + +/******************** Physical EM devices from e1000_hw.h ********************/ + +#define E1000_DEV_ID_82542 0x1000 +#define E1000_DEV_ID_82543GC_FIBER 0x1001 +#define E1000_DEV_ID_82543GC_COPPER 0x1004 +#define E1000_DEV_ID_82544EI_COPPER 0x1008 +#define E1000_DEV_ID_82544EI_FIBER 0x1009 +#define E1000_DEV_ID_82544GC_COPPER 0x100C +#define E1000_DEV_ID_82544GC_LOM 0x100D +#define E1000_DEV_ID_82540EM 0x100E +#define E1000_DEV_ID_82540EM_LOM 0x1015 +#define E1000_DEV_ID_82540EP_LOM 0x1016 +#define E1000_DEV_ID_82540EP 0x1017 +#define E1000_DEV_ID_82540EP_LP 0x101E +#define E1000_DEV_ID_82545EM_COPPER 0x100F +#define E1000_DEV_ID_82545EM_FIBER 0x1011 +#define E1000_DEV_ID_82545GM_COPPER 0x1026 +#define E1000_DEV_ID_82545GM_FIBER 0x1027 +#define E1000_DEV_ID_82545GM_SERDES 0x1028 +#define E1000_DEV_ID_82546EB_COPPER 0x1010 +#define E1000_DEV_ID_82546EB_FIBER 0x1012 +#define E1000_DEV_ID_82546EB_QUAD_COPPER 0x101D +#define E1000_DEV_ID_82546GB_COPPER 0x1079 +#define E1000_DEV_ID_82546GB_FIBER 0x107A +#define E1000_DEV_ID_82546GB_SERDES 0x107B +#define E1000_DEV_ID_82546GB_PCIE 0x108A +#define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099 +#define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5 +#define E1000_DEV_ID_82541EI 0x1013 +#define E1000_DEV_ID_82541EI_MOBILE 0x1018 +#define E1000_DEV_ID_82541ER_LOM 0x1014 +#define E1000_DEV_ID_82541ER 0x1078 +#define E1000_DEV_ID_82541GI 0x1076 +#define E1000_DEV_ID_82541GI_LF 0x107C +#define E1000_DEV_ID_82541GI_MOBILE 0x1077 +#define E1000_DEV_ID_82547EI 0x1019 +#define E1000_DEV_ID_82547EI_MOBILE 0x101A +#define E1000_DEV_ID_82547GI 0x1075 +#define E1000_DEV_ID_82571EB_COPPER 0x105E +#define E1000_DEV_ID_82571EB_FIBER 0x105F +#define E1000_DEV_ID_82571EB_SERDES 0x1060 +#define E1000_DEV_ID_82571EB_SERDES_DUAL 0x10D9 +#define E1000_DEV_ID_82571EB_SERDES_QUAD 0x10DA +#define E1000_DEV_ID_82571EB_QUAD_COPPER 0x10A4 +#define E1000_DEV_ID_82571PT_QUAD_COPPER 0x10D5 +#define E1000_DEV_ID_82571EB_QUAD_FIBER 0x10A5 +#define E1000_DEV_ID_82571EB_QUAD_COPPER_LP 0x10BC +#define E1000_DEV_ID_82572EI_COPPER 0x107D +#define E1000_DEV_ID_82572EI_FIBER 0x107E +#define E1000_DEV_ID_82572EI_SERDES 0x107F +#define E1000_DEV_ID_82572EI 0x10B9 +#define E1000_DEV_ID_82573E 0x108B +#define E1000_DEV_ID_82573E_IAMT 0x108C +#define E1000_DEV_ID_82573L 0x109A +#define E1000_DEV_ID_82574L 0x10D3 +#define E1000_DEV_ID_82574LA 0x10F6 +#define E1000_DEV_ID_82583V 0x150C +#define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096 +#define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098 +#define E1000_DEV_ID_80003ES2LAN_COPPER_SPT 0x10BA +#define E1000_DEV_ID_80003ES2LAN_SERDES_SPT 0x10BB +#define E1000_DEV_ID_ICH8_82567V_3 0x1501 +#define E1000_DEV_ID_ICH8_IGP_M_AMT 0x1049 +#define E1000_DEV_ID_ICH8_IGP_AMT 0x104A +#define E1000_DEV_ID_ICH8_IGP_C 0x104B +#define E1000_DEV_ID_ICH8_IFE 0x104C +#define E1000_DEV_ID_ICH8_IFE_GT 0x10C4 +#define E1000_DEV_ID_ICH8_IFE_G 0x10C5 +#define E1000_DEV_ID_ICH8_IGP_M 0x104D +#define E1000_DEV_ID_ICH9_IGP_M 0x10BF +#define E1000_DEV_ID_ICH9_IGP_M_AMT 0x10F5 +#define E1000_DEV_ID_ICH9_IGP_M_V 0x10CB +#define E1000_DEV_ID_ICH9_IGP_AMT 0x10BD +#define E1000_DEV_ID_ICH9_BM 0x10E5 +#define E1000_DEV_ID_ICH9_IGP_C 0x294C +#define E1000_DEV_ID_ICH9_IFE 0x10C0 +#define E1000_DEV_ID_ICH9_IFE_GT 0x10C3 +#define E1000_DEV_ID_ICH9_IFE_G 0x10C2 +#define E1000_DEV_ID_ICH10_R_BM_LM 0x10CC +#define E1000_DEV_ID_ICH10_R_BM_LF 0x10CD +#define E1000_DEV_ID_ICH10_R_BM_V 0x10CE +#define E1000_DEV_ID_ICH10_D_BM_LM 0x10DE +#define E1000_DEV_ID_ICH10_D_BM_LF 0x10DF +#define E1000_DEV_ID_ICH10_D_BM_V 0x1525 + +#define E1000_DEV_ID_PCH_M_HV_LM 0x10EA +#define E1000_DEV_ID_PCH_M_HV_LC 0x10EB +#define E1000_DEV_ID_PCH_D_HV_DM 0x10EF +#define E1000_DEV_ID_PCH_D_HV_DC 0x10F0 +#define E1000_DEV_ID_PCH2_LV_LM 0x1502 +#define E1000_DEV_ID_PCH2_LV_V 0x1503 +#define E1000_DEV_ID_PCH_LPT_I217_LM 0x153A +#define E1000_DEV_ID_PCH_LPT_I217_V 0x153B +#define E1000_DEV_ID_PCH_LPTLP_I218_LM 0x155A +#define E1000_DEV_ID_PCH_LPTLP_I218_V 0x1559 +#define E1000_DEV_ID_PCH_I218_LM2 0x15A0 +#define E1000_DEV_ID_PCH_I218_V2 0x15A1 +#define E1000_DEV_ID_PCH_I218_LM3 0x15A2 +#define E1000_DEV_ID_PCH_I218_V3 0x15A3 + + +/* + * Tested (supported) on VM emulated HW. + */ + +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82540EM) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82545EM_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82545EM_FIBER) + +/* + * Tested (supported) on real HW. + */ + +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82546EB_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82546EB_FIBER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82546EB_QUAD_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_FIBER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_SERDES) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_SERDES_DUAL) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_SERDES_QUAD) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571PT_QUAD_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_QUAD_FIBER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER_LP) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI_FIBER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI_SERDES) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82573L) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82574L) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82574LA) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82583V) + +/******************** Physical IGB devices from e1000_hw.h ********************/ + +#define E1000_DEV_ID_82576 0x10C9 +#define E1000_DEV_ID_82576_FIBER 0x10E6 +#define E1000_DEV_ID_82576_SERDES 0x10E7 +#define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 +#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 +#define E1000_DEV_ID_82576_NS 0x150A +#define E1000_DEV_ID_82576_NS_SERDES 0x1518 +#define E1000_DEV_ID_82576_SERDES_QUAD 0x150D +#define E1000_DEV_ID_82575EB_COPPER 0x10A7 +#define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 +#define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 +#define E1000_DEV_ID_82580_COPPER 0x150E +#define E1000_DEV_ID_82580_FIBER 0x150F +#define E1000_DEV_ID_82580_SERDES 0x1510 +#define E1000_DEV_ID_82580_SGMII 0x1511 +#define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 +#define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 +#define E1000_DEV_ID_I350_COPPER 0x1521 +#define E1000_DEV_ID_I350_FIBER 0x1522 +#define E1000_DEV_ID_I350_SERDES 0x1523 +#define E1000_DEV_ID_I350_SGMII 0x1524 +#define E1000_DEV_ID_I350_DA4 0x1546 +#define E1000_DEV_ID_I210_COPPER 0x1533 +#define E1000_DEV_ID_I210_COPPER_OEM1 0x1534 +#define E1000_DEV_ID_I210_COPPER_IT 0x1535 +#define E1000_DEV_ID_I210_FIBER 0x1536 +#define E1000_DEV_ID_I210_SERDES 0x1537 +#define E1000_DEV_ID_I210_SGMII 0x1538 +#define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B +#define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C +#define E1000_DEV_ID_I211_COPPER 0x1539 +#define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40 +#define E1000_DEV_ID_I354_SGMII 0x1F41 +#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45 +#define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 +#define E1000_DEV_ID_DH89XXCC_SERDES 0x043A +#define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C +#define E1000_DEV_ID_DH89XXCC_SFP 0x0440 + +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_FIBER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES_QUAD) + +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER) + +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_FIBER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SGMII) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER_DUAL) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_QUAD_FIBER) + +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_FIBER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SGMII) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_DA4) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_OEM1) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_IT) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_FIBER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SGMII) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I211_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_SGMII) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SGMII) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SFP) + +/****************** Physical IXGBE devices from ixgbe_type.h ******************/ + +#define IXGBE_DEV_ID_82598 0x10B6 +#define IXGBE_DEV_ID_82598_BX 0x1508 +#define IXGBE_DEV_ID_82598AF_DUAL_PORT 0x10C6 +#define IXGBE_DEV_ID_82598AF_SINGLE_PORT 0x10C7 +#define IXGBE_DEV_ID_82598AT 0x10C8 +#define IXGBE_DEV_ID_82598AT2 0x150B +#define IXGBE_DEV_ID_82598EB_SFP_LOM 0x10DB +#define IXGBE_DEV_ID_82598EB_CX4 0x10DD +#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT 0x10EC +#define IXGBE_DEV_ID_82598_DA_DUAL_PORT 0x10F1 +#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM 0x10E1 +#define IXGBE_DEV_ID_82598EB_XF_LR 0x10F4 +#define IXGBE_DEV_ID_82599_KX4 0x10F7 +#define IXGBE_DEV_ID_82599_KX4_MEZZ 0x1514 +#define IXGBE_DEV_ID_82599_KR 0x1517 +#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE 0x10F8 +#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ 0x000C +#define IXGBE_DEV_ID_82599_CX4 0x10F9 +#define IXGBE_DEV_ID_82599_SFP 0x10FB +#define IXGBE_SUBDEV_ID_82599_SFP 0x11A9 +#define IXGBE_SUBDEV_ID_82599_RNDC 0x1F72 +#define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0 +#define IXGBE_SUBDEV_ID_82599_ECNA_DP 0x0470 +#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152A +#define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529 +#define IXGBE_DEV_ID_82599_SFP_EM 0x1507 +#define IXGBE_DEV_ID_82599_SFP_SF2 0x154D +#define IXGBE_DEV_ID_82599_SFP_SF_QP 0x154A +#define IXGBE_DEV_ID_82599_QSFP_SF_QP 0x1558 +#define IXGBE_DEV_ID_82599EN_SFP 0x1557 +#define IXGBE_DEV_ID_82599_XAUI_LOM 0x10FC +#define IXGBE_DEV_ID_82599_T3_LOM 0x151C +#define IXGBE_DEV_ID_82599_LS 0x154F +#define IXGBE_DEV_ID_X540T 0x1528 +#define IXGBE_DEV_ID_X540T1 0x1560 +#define IXGBE_DEV_ID_X550EM_X_SFP 0x15AC +#define IXGBE_DEV_ID_X550EM_X_10G_T 0x15AD +#define IXGBE_DEV_ID_X550EM_X_1G_T 0x15AE +#define IXGBE_DEV_ID_X550T 0x1563 +#define IXGBE_DEV_ID_X550EM_X_KX4 0x15AA +#define IXGBE_DEV_ID_X550EM_X_KR 0x15AB + +#ifdef RTE_NIC_BYPASS +#define IXGBE_DEV_ID_82599_BYPASS 0x155D +#endif + +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_BX) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ + IXGBE_DEV_ID_82598AF_SINGLE_PORT) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT2) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_CX4) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ + IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_XF_LR) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KR) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ + IXGBE_DEV_ID_82599_COMBO_BACKPLANE) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ + IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_CX4) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_SFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_RNDC) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_560FLR) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_ECNA_DP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_FCOE) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_EM) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF2) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF_QP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599EN_SFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_XAUI_LOM) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_T3_LOM) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_LS) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T1) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_SFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_10G_T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_1G_T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KX4) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KR) + +#ifdef RTE_NIC_BYPASS +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BYPASS) +#endif + +/*************** Physical I40E devices from i40e_type.h *****************/ + +#define I40E_DEV_ID_SFP_XL710 0x1572 +#define I40E_DEV_ID_QEMU 0x1574 +#define I40E_DEV_ID_KX_A 0x157F +#define I40E_DEV_ID_KX_B 0x1580 +#define I40E_DEV_ID_KX_C 0x1581 +#define I40E_DEV_ID_QSFP_A 0x1583 +#define I40E_DEV_ID_QSFP_B 0x1584 +#define I40E_DEV_ID_QSFP_C 0x1585 +#define I40E_DEV_ID_10G_BASE_T 0x1586 +#define I40E_DEV_ID_20G_KR2 0x1587 +#define I40E_DEV_ID_20G_KR2_A 0x1588 +#define I40E_DEV_ID_10G_BASE_T4 0x1589 +#define I40E_DEV_ID_X722_A0 0x374C +#define I40E_DEV_ID_SFP_X722 0x37D0 +#define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 +#define I40E_DEV_ID_10G_BASE_T_X722 0x37D2 + +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_XL710) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QEMU) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_A) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_B) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_C) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_A) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_B) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_C) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_20G_KR2) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_20G_KR2_A) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T4) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_A0) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_X722) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_1G_BASE_T_X722) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T_X722) + +/*************** Physical FM10K devices from fm10k_type.h ***************/ + +#define FM10K_DEV_ID_PF 0x15A4 +#define FM10K_DEV_ID_SDI_FM10420_QDA2 0x15D0 + +RTE_PCI_DEV_ID_DECL_FM10K(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_PF) +RTE_PCI_DEV_ID_DECL_FM10K(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_SDI_FM10420_QDA2) + +/****************** Virtual IGB devices from e1000_hw.h ******************/ + +#define E1000_DEV_ID_82576_VF 0x10CA +#define E1000_DEV_ID_82576_VF_HV 0x152D +#define E1000_DEV_ID_I350_VF 0x1520 +#define E1000_DEV_ID_I350_VF_HV 0x152F + +RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF) +RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF_HV) +RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF) +RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF_HV) + +/****************** Virtual IXGBE devices from ixgbe_type.h ******************/ + +#define IXGBE_DEV_ID_82599_VF 0x10ED +#define IXGBE_DEV_ID_82599_VF_HV 0x152E +#define IXGBE_DEV_ID_X540_VF 0x1515 +#define IXGBE_DEV_ID_X540_VF_HV 0x1530 +#define IXGBE_DEV_ID_X550_VF_HV 0x1564 +#define IXGBE_DEV_ID_X550_VF 0x1565 +#define IXGBE_DEV_ID_X550EM_X_VF 0x15A8 +#define IXGBE_DEV_ID_X550EM_X_VF_HV 0x15A9 + +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF_HV) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF_HV) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF_HV) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV) + +/****************** Virtual I40E devices from i40e_type.h ********************/ + +#define I40E_DEV_ID_VF 0x154C +#define I40E_DEV_ID_VF_HV 0x1571 +#define I40E_DEV_ID_X722_VF 0x37CD +#define I40E_DEV_ID_X722_VF_HV 0x37D9 + +RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_VF) +RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_VF_HV) +RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_VF) +RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_VF_HV) + +/****************** Virtio devices from virtio.h ******************/ + +#define QUMRANET_DEV_ID_VIRTIO 0x1000 + +RTE_PCI_DEV_ID_DECL_VIRTIO(PCI_VENDOR_ID_QUMRANET, QUMRANET_DEV_ID_VIRTIO) + +/****************** VMware VMXNET3 devices ******************/ + +#define VMWARE_DEV_ID_VMXNET3 0x07B0 + +RTE_PCI_DEV_ID_DECL_VMXNET3(PCI_VENDOR_ID_VMWARE, VMWARE_DEV_ID_VMXNET3) + +/*************** Virtual FM10K devices from fm10k_type.h ***************/ + +#define FM10K_DEV_ID_VF 0x15A5 + +RTE_PCI_DEV_ID_DECL_FM10KVF(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_VF) + +/****************** Cisco VIC devices ******************/ + +#define PCI_DEVICE_ID_CISCO_VIC_ENET 0x0043 /* ethernet vnic */ +#define PCI_DEVICE_ID_CISCO_VIC_ENET_VF 0x0071 /* enet SRIOV VF */ + +RTE_PCI_DEV_ID_DECL_ENIC(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) +RTE_PCI_DEV_ID_DECL_ENIC(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_VF) + +/****************** QLogic devices ******************/ + +/* Broadcom/QLogic BNX2X */ +#define BNX2X_DEV_ID_57710 0x164e +#define BNX2X_DEV_ID_57711 0x164f +#define BNX2X_DEV_ID_57711E 0x1650 +#define BNX2X_DEV_ID_57712 0x1662 +#define BNX2X_DEV_ID_57712_MF 0x1663 +#define BNX2X_DEV_ID_57712_VF 0x166f +#define BNX2X_DEV_ID_57713 0x1651 +#define BNX2X_DEV_ID_57713E 0x1652 +#define BNX2X_DEV_ID_57800 0x168a +#define BNX2X_DEV_ID_57800_MF 0x16a5 +#define BNX2X_DEV_ID_57800_VF 0x16a9 +#define BNX2X_DEV_ID_57810 0x168e +#define BNX2X_DEV_ID_57810_MF 0x16ae +#define BNX2X_DEV_ID_57810_VF 0x16af +#define BNX2X_DEV_ID_57811 0x163d +#define BNX2X_DEV_ID_57811_MF 0x163e +#define BNX2X_DEV_ID_57811_VF 0x163f + +#define BNX2X_DEV_ID_57840_OBS 0x168d +#define BNX2X_DEV_ID_57840_OBS_MF 0x16ab +#define BNX2X_DEV_ID_57840_4_10 0x16a1 +#define BNX2X_DEV_ID_57840_2_20 0x16a2 +#define BNX2X_DEV_ID_57840_MF 0x16a4 +#define BNX2X_DEV_ID_57840_VF 0x16ad + +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57800) +RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57800_VF) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57711) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57810) +RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57810_VF) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811) +RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811_VF) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_OBS) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_4_10) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_2_20) +RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_VF) +#ifdef RTE_LIBRTE_BNX2X_MF_SUPPORT +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57810_MF) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811_MF) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_MF) +#endif + +/* + * Undef all RTE_PCI_DEV_ID_DECL_* here. + */ +#undef RTE_PCI_DEV_ID_DECL_BNX2X +#undef RTE_PCI_DEV_ID_DECL_BNX2XVF +#undef RTE_PCI_DEV_ID_DECL_EM +#undef RTE_PCI_DEV_ID_DECL_IGB +#undef RTE_PCI_DEV_ID_DECL_IGBVF +#undef RTE_PCI_DEV_ID_DECL_IXGBE +#undef RTE_PCI_DEV_ID_DECL_IXGBEVF +#undef RTE_PCI_DEV_ID_DECL_I40E +#undef RTE_PCI_DEV_ID_DECL_I40EVF +#undef RTE_PCI_DEV_ID_DECL_VIRTIO +#undef RTE_PCI_DEV_ID_DECL_VMXNET3 +#undef RTE_PCI_DEV_ID_DECL_FM10K +#undef RTE_PCI_DEV_ID_DECL_FM10KVF diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_per_lcore.h b/src/dpdk22/lib/librte_eal/common/include/rte_per_lcore.h new file mode 100644 index 00000000..5434729a --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_per_lcore.h @@ -0,0 +1,79 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PER_LCORE_H_ +#define _RTE_PER_LCORE_H_ + +/** + * @file + * + * Per-lcore variables in RTE + * + * This file defines an API for instantiating per-lcore "global + * variables" that are environment-specific. Note that in all + * environments, a "shared variable" is the default when you use a + * global variable. + * + * Parts of this are execution environment specific. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/** + * Macro to define a per lcore variable "var" of type "type", don't + * use keywords like "static" or "volatile" in type, just prefix the + * whole macro. + */ +#define RTE_DEFINE_PER_LCORE(type, name) \ + __thread __typeof__(type) per_lcore_##name + +/** + * Macro to declare an extern per lcore variable "var" of type "type" + */ +#define RTE_DECLARE_PER_LCORE(type, name) \ + extern __thread __typeof__(type) per_lcore_##name + +/** + * Read/write the per-lcore variable value + */ +#define RTE_PER_LCORE(name) (per_lcore_##name) + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PER_LCORE_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_random.h b/src/dpdk22/lib/librte_eal/common/include/rte_random.h new file mode 100644 index 00000000..24ae8363 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_random.h @@ -0,0 +1,91 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_RANDOM_H_ +#define _RTE_RANDOM_H_ + +/** + * @file + * + * Pseudo-random Generators in RTE + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/** + * Seed the pseudo-random generator. + * + * The generator is automatically seeded by the EAL init with a timer + * value. It may need to be re-seeded by the user with a real random + * value. + * + * @param seedval + * The value of the seed. + */ +static inline void +rte_srand(uint64_t seedval) +{ + srand48((long unsigned int)seedval); +} + +/** + * Get a pseudo-random value. + * + * This function generates pseudo-random numbers using the linear + * congruential algorithm and 48-bit integer arithmetic, called twice + * to generate a 64-bit value. + * + * @return + * A pseudo-random value between 0 and (1<<64)-1. + */ +static inline uint64_t +rte_rand(void) +{ + uint64_t val; + val = lrand48(); + val <<= 32; + val += lrand48(); + return val; +} + +#ifdef __cplusplus +} +#endif + + +#endif /* _RTE_PER_LCORE_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_string_fns.h b/src/dpdk22/lib/librte_eal/common/include/rte_string_fns.h new file mode 100644 index 00000000..cfca2f8d --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_string_fns.h @@ -0,0 +1,81 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * + * String-related functions as replacement for libc equivalents + */ + +#ifndef _RTE_STRING_FNS_H_ +#define _RTE_STRING_FNS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Takes string "string" parameter and splits it at character "delim" + * up to maxtokens-1 times - to give "maxtokens" resulting tokens. Like + * strtok or strsep functions, this modifies its input string, by replacing + * instances of "delim" with '\\0'. All resultant tokens are returned in the + * "tokens" array which must have enough entries to hold "maxtokens". + * + * @param string + * The input string to be split into tokens + * + * @param stringlen + * The max length of the input buffer + * + * @param tokens + * The array to hold the pointers to the tokens in the string + * + * @param maxtokens + * The number of elements in the tokens array. At most, maxtokens-1 splits + * of the string will be done. + * + * @param delim + * The character on which the split of the data will be done + * + * @return + * The number of tokens in the tokens array. + */ +int +rte_strsplit(char *string, int stringlen, + char **tokens, int maxtokens, char delim); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_STRING_FNS_H */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_tailq.h b/src/dpdk22/lib/librte_eal/common/include/rte_tailq.h new file mode 100644 index 00000000..4a686e68 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_tailq.h @@ -0,0 +1,162 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_TAILQ_H_ +#define _RTE_TAILQ_H_ + +/** + * @file + * Here defines rte_tailq APIs for only internal use + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/** dummy structure type used by the rte_tailq APIs */ +struct rte_tailq_entry { + TAILQ_ENTRY(rte_tailq_entry) next; /**< Pointer entries for a tailq list */ + void *data; /**< Pointer to the data referenced by this tailq entry */ +}; +/** dummy */ +TAILQ_HEAD(rte_tailq_entry_head, rte_tailq_entry); + +#define RTE_TAILQ_NAMESIZE 32 + +/** + * The structure defining a tailq header entry for storing + * in the rte_config structure in shared memory. Each tailq + * is identified by name. + * Any library storing a set of objects e.g. rings, mempools, hash-tables, + * is recommended to use an entry here, so as to make it easy for + * a multi-process app to find already-created elements in shared memory. + */ +struct rte_tailq_head { + struct rte_tailq_entry_head tailq_head; /**< NOTE: must be first element */ + char name[RTE_TAILQ_NAMESIZE]; +}; + +struct rte_tailq_elem { + /** + * Reference to head in shared mem, updated at init time by + * rte_eal_tailqs_init() + */ + struct rte_tailq_head *head; + TAILQ_ENTRY(rte_tailq_elem) next; + const char name[RTE_TAILQ_NAMESIZE]; +}; + +/** + * Return the first tailq entry casted to the right struct. + */ +#define RTE_TAILQ_CAST(tailq_entry, struct_name) \ + (struct struct_name *)&(tailq_entry)->tailq_head + +/** + * Utility macro to make looking up a tailqueue for a particular struct easier. + * + * @param name + * The name of tailq + * + * @param struct_name + * The name of the list type we are using. (Generally this is the same as the + * first parameter passed to TAILQ_HEAD macro) + * + * @return + * The return value from rte_eal_tailq_lookup, typecast to the appropriate + * structure pointer type. + * NULL on error, since the tailq_head is the first + * element in the rte_tailq_head structure. + */ +#define RTE_TAILQ_LOOKUP(name, struct_name) \ + RTE_TAILQ_CAST(rte_eal_tailq_lookup(name), struct_name) + +/** + * Dump tail queues to the console. + * + * @param f + * A pointer to a file for output + */ +void rte_dump_tailq(FILE *f); + +/** + * Lookup for a tail queue. + * + * Get a pointer to a tail queue header of a tail + * queue identified by the name given as an argument. + * Note: this function is not multi-thread safe, and should only be called from + * a single thread at a time + * + * @param name + * The name of the queue. + * @return + * A pointer to the tail queue head structure. + */ +struct rte_tailq_head *rte_eal_tailq_lookup(const char *name); + +/** + * Register a tail queue. + * + * Register a tail queue from shared memory. + * This function is mainly used by EAL_REGISTER_TAILQ macro which is used to + * register tailq from the different dpdk libraries. Since this macro is a + * constructor, the function has no access to dpdk shared memory, so the + * registered tailq can not be used before call to rte_eal_init() which calls + * rte_eal_tailqs_init(). + * + * @param t + * The tailq element which contains the name of the tailq you want to + * create (/retrieve when in secondary process). + * @return + * 0 on success or -1 in case of an error. + */ +int rte_eal_tailq_register(struct rte_tailq_elem *t); + +#define EAL_REGISTER_TAILQ(t) \ +void tailqinitfn_ ##t(void); \ +void __attribute__((constructor, used)) tailqinitfn_ ##t(void) \ +{ \ + if (rte_eal_tailq_register(&t) < 0) \ + rte_panic("Cannot initialize tailq: %s\n", t.name); \ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_TAILQ_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_time.h b/src/dpdk22/lib/librte_eal/common/include/rte_time.h new file mode 100644 index 00000000..4b13b9c1 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_time.h @@ -0,0 +1,122 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define NSEC_PER_SEC 1000000000L + +/** + * Structure to hold the parameters of a running cycle counter to assist + * in converting cycles to nanoseconds. + */ +struct rte_timecounter { + /** Last cycle counter value read. */ + uint64_t cycle_last; + /** Nanoseconds count. */ + uint64_t nsec; + /** Bitmask separating nanosecond and sub-nanoseconds. */ + uint64_t nsec_mask; + /** Sub-nanoseconds count. */ + uint64_t nsec_frac; + /** Bitmask for two's complement substraction of non-64 bit counters. */ + uint64_t cc_mask; + /** Cycle to nanosecond divisor (power of two). */ + uint32_t cc_shift; +}; + +/** + * Converts cyclecounter cycles to nanoseconds. + */ +static inline uint64_t +rte_cyclecounter_cycles_to_ns(struct rte_timecounter *tc, uint64_t cycles) +{ + uint64_t ns; + + /* Add fractional nanoseconds. */ + ns = cycles + tc->nsec_frac; + tc->nsec_frac = ns & tc->nsec_mask; + + /* Shift to get only nanoseconds. */ + return ns >> tc->cc_shift; +} + +/** + * Update the internal nanosecond count in the structure. + */ +static inline uint64_t +rte_timecounter_update(struct rte_timecounter *tc, uint64_t cycle_now) +{ + uint64_t cycle_delta, ns_offset; + + /* Calculate the delta since the last call. */ + if (tc->cycle_last <= cycle_now) + cycle_delta = (cycle_now - tc->cycle_last) & tc->cc_mask; + else + /* Handle cycle counts that have wrapped around . */ + cycle_delta = (~(tc->cycle_last - cycle_now) & tc->cc_mask) + 1; + + /* Convert to nanoseconds. */ + ns_offset = rte_cyclecounter_cycles_to_ns(tc, cycle_delta); + + /* Store current cycle counter for next call. */ + tc->cycle_last = cycle_now; + + /* Update the nanosecond count. */ + tc->nsec += ns_offset; + + return tc->nsec; +} + +/** + * Convert from timespec structure into nanosecond units. + */ +static inline uint64_t +rte_timespec_to_ns(const struct timespec *ts) +{ + return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; +} + +/** + * Convert from nanosecond units into timespec structure. + */ +static inline struct timespec +rte_ns_to_timespec(uint64_t nsec) +{ + struct timespec ts = {0, 0}; + + if (nsec == 0) + return ts; + + ts.tv_sec = nsec / NSEC_PER_SEC; + ts.tv_nsec = nsec % NSEC_PER_SEC; + + return ts; +} diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_version.h b/src/dpdk22/lib/librte_eal/common/include/rte_version.h new file mode 100644 index 00000000..bb3e9fc2 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_version.h @@ -0,0 +1,130 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Definitions of Intel(R) DPDK version numbers + */ + +#ifndef _RTE_VERSION_H_ +#define _RTE_VERSION_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/** + * String that appears before the version number + */ +#define RTE_VER_PREFIX "RTE" + +/** + * Major version number i.e. the x in x.y.z + */ +#define RTE_VER_MAJOR 2 + +/** + * Minor version number i.e. the y in x.y.z + */ +#define RTE_VER_MINOR 2 + +/** + * Patch level number i.e. the z in x.y.z + */ +#define RTE_VER_PATCH_LEVEL 0 + +/** + * Extra string to be appended to version number + */ +#define RTE_VER_SUFFIX "" + +/** + * Patch release number + * 0-15 = release candidates + * 16 = release + */ +#define RTE_VER_PATCH_RELEASE 16 + +/** + * Macro to compute a version number usable for comparisons + */ +#define RTE_VERSION_NUM(a,b,c,d) ((a) << 24 | (b) << 16 | (c) << 8 | (d)) + +/** + * All version numbers in one to compare with RTE_VERSION_NUM() + */ +#define RTE_VERSION RTE_VERSION_NUM( \ + RTE_VER_MAJOR, \ + RTE_VER_MINOR, \ + RTE_VER_PATCH_LEVEL, \ + RTE_VER_PATCH_RELEASE) + +/** + * Function returning version string + * @return + * string + */ +static inline const char * +rte_version(void) +{ + static char version[32]; + if (version[0] != 0) + return version; + if (strlen(RTE_VER_SUFFIX) == 0) + snprintf(version, sizeof(version), "%s %d.%d.%d", + RTE_VER_PREFIX, + RTE_VER_MAJOR, + RTE_VER_MINOR, + RTE_VER_PATCH_LEVEL); + else + snprintf(version, sizeof(version), "%s %d.%d.%d%s%d", + RTE_VER_PREFIX, + RTE_VER_MAJOR, + RTE_VER_MINOR, + RTE_VER_PATCH_LEVEL, + RTE_VER_SUFFIX, + RTE_VER_PATCH_RELEASE < 16 ? + RTE_VER_PATCH_RELEASE : + RTE_VER_PATCH_RELEASE - 16); + return version; +} + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_VERSION_H */ diff --git a/src/dpdk22/lib/librte_eal/common/include/rte_warnings.h b/src/dpdk22/lib/librte_eal/common/include/rte_warnings.h new file mode 100644 index 00000000..da80877f --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/include/rte_warnings.h @@ -0,0 +1,84 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Definitions of warnings for use of various insecure functions + */ + +#ifndef _RTE_WARNINGS_H_ +#define _RTE_WARNINGS_H_ + +#ifdef RTE_INSECURE_FUNCTION_WARNING + +/* we need to include all used standard header files so that they appear + * _before_ we poison the function names. + */ + +#include +#include +#include +#include +#include +#ifdef RTE_LIBRTE_EAL_LINUXAPP +#include +#endif + +/* the following function are deemed not fully secure for use e.g. they + * do not always null-terminate arguments */ +#pragma GCC poison sprintf strtok snprintf vsnprintf +#pragma GCC poison strlen strcpy strcat +#pragma GCC poison sscanf + +/* other unsafe functions may be implemented as macros so just undef them */ +#ifdef strsep +#undef strsep +#else +#pragma GCC poison strsep +#endif + +#ifdef strncpy +#undef strncpy +#else +#pragma GCC poison strncpy +#endif + +#ifdef strncat +#undef strncat +#else +#pragma GCC poison strncat +#endif + +#endif + +#endif /* RTE_WARNINGS_H */ diff --git a/src/dpdk22/lib/librte_eal/common/malloc_elem.c b/src/dpdk22/lib/librte_eal/common/malloc_elem.c new file mode 100644 index 00000000..b54ee330 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/malloc_elem.c @@ -0,0 +1,344 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "malloc_elem.h" +#include "malloc_heap.h" + +#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE) + +/* + * initialise a general malloc_elem header structure + */ +void +malloc_elem_init(struct malloc_elem *elem, + struct malloc_heap *heap, const struct rte_memseg *ms, size_t size) +{ + elem->heap = heap; + elem->ms = ms; + elem->prev = NULL; + memset(&elem->free_list, 0, sizeof(elem->free_list)); + elem->state = ELEM_FREE; + elem->size = size; + elem->pad = 0; + set_header(elem); + set_trailer(elem); +} + +/* + * initialise a dummy malloc_elem header for the end-of-memseg marker + */ +void +malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev) +{ + malloc_elem_init(elem, prev->heap, prev->ms, 0); + elem->prev = prev; + elem->state = ELEM_BUSY; /* mark busy so its never merged */ +} + +/* + * calculate the starting point of where data of the requested size + * and alignment would fit in the current element. If the data doesn't + * fit, return NULL. + */ +static void * +elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align, + size_t bound) +{ + const size_t bmask = ~(bound - 1); + uintptr_t end_pt = (uintptr_t)elem + + elem->size - MALLOC_ELEM_TRAILER_LEN; + uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); + uintptr_t new_elem_start; + + /* check boundary */ + if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) { + end_pt = RTE_ALIGN_FLOOR(end_pt, bound); + new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); + if (((end_pt - 1) & bmask) != (new_data_start & bmask)) + return NULL; + } + + new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN; + + /* if the new start point is before the exist start, it won't fit */ + return (new_elem_start < (uintptr_t)elem) ? NULL : (void *)new_elem_start; +} + +/* + * use elem_start_pt to determine if we get meet the size and + * alignment request from the current element + */ +int +malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align, + size_t bound) +{ + return elem_start_pt(elem, size, align, bound) != NULL; +} + +/* + * split an existing element into two smaller elements at the given + * split_pt parameter. + */ +static void +split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt) +{ + struct malloc_elem *next_elem = RTE_PTR_ADD(elem, elem->size); + const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem; + const size_t new_elem_size = elem->size - old_elem_size; + + malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size); + split_pt->prev = elem; + next_elem->prev = split_pt; + elem->size = old_elem_size; + set_trailer(elem); +} + +/* + * Given an element size, compute its freelist index. + * We free an element into the freelist containing similarly-sized elements. + * We try to allocate elements starting with the freelist containing + * similarly-sized elements, and if necessary, we search freelists + * containing larger elements. + * + * Example element size ranges for a heap with five free lists: + * heap->free_head[0] - (0 , 2^8] + * heap->free_head[1] - (2^8 , 2^10] + * heap->free_head[2] - (2^10 ,2^12] + * heap->free_head[3] - (2^12, 2^14] + * heap->free_head[4] - (2^14, MAX_SIZE] + */ +size_t +malloc_elem_free_list_index(size_t size) +{ +#define MALLOC_MINSIZE_LOG2 8 +#define MALLOC_LOG2_INCREMENT 2 + + size_t log2; + size_t index; + + if (size <= (1UL << MALLOC_MINSIZE_LOG2)) + return 0; + + /* Find next power of 2 >= size. */ + log2 = sizeof(size) * 8 - __builtin_clzl(size-1); + + /* Compute freelist index, based on log2(size). */ + index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) / + MALLOC_LOG2_INCREMENT; + + return (index <= RTE_HEAP_NUM_FREELISTS-1? + index: RTE_HEAP_NUM_FREELISTS-1); +} + +/* + * Add the specified element to its heap's free list. + */ +void +malloc_elem_free_list_insert(struct malloc_elem *elem) +{ + size_t idx; + + idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN); + elem->state = ELEM_FREE; + LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list); +} + +/* + * Remove the specified element from its heap's free list. + */ +static void +elem_free_list_remove(struct malloc_elem *elem) +{ + LIST_REMOVE(elem, free_list); +} + +/* + * reserve a block of data in an existing malloc_elem. If the malloc_elem + * is much larger than the data block requested, we split the element in two. + * This function is only called from malloc_heap_alloc so parameter checking + * is not done here, as it's done there previously. + */ +struct malloc_elem * +malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align, + size_t bound) +{ + struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound); + const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem; + const size_t trailer_size = elem->size - old_elem_size - size - + MALLOC_ELEM_OVERHEAD; + + elem_free_list_remove(elem); + + if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { + /* split it, too much free space after elem */ + struct malloc_elem *new_free_elem = + RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD); + + split_elem(elem, new_free_elem); + malloc_elem_free_list_insert(new_free_elem); + } + + if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { + /* don't split it, pad the element instead */ + elem->state = ELEM_BUSY; + elem->pad = old_elem_size; + + /* put a dummy header in padding, to point to real element header */ + if (elem->pad > 0){ /* pad will be at least 64-bytes, as everything + * is cache-line aligned */ + new_elem->pad = elem->pad; + new_elem->state = ELEM_PAD; + new_elem->size = elem->size - elem->pad; + set_header(new_elem); + } + + return new_elem; + } + + /* we are going to split the element in two. The original element + * remains free, and the new element is the one allocated. + * Re-insert original element, in case its new size makes it + * belong on a different list. + */ + split_elem(elem, new_elem); + new_elem->state = ELEM_BUSY; + malloc_elem_free_list_insert(elem); + + return new_elem; +} + +/* + * joing two struct malloc_elem together. elem1 and elem2 must + * be contiguous in memory. + */ +static inline void +join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2) +{ + struct malloc_elem *next = RTE_PTR_ADD(elem2, elem2->size); + elem1->size += elem2->size; + next->prev = elem1; +} + +/* + * free a malloc_elem block by adding it to the free list. If the + * blocks either immediately before or immediately after newly freed block + * are also free, the blocks are merged together. + */ +int +malloc_elem_free(struct malloc_elem *elem) +{ + if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY) + return -1; + + rte_spinlock_lock(&(elem->heap->lock)); + struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size); + if (next->state == ELEM_FREE){ + /* remove from free list, join to this one */ + elem_free_list_remove(next); + join_elem(elem, next); + } + + /* check if previous element is free, if so join with it and return, + * need to re-insert in free list, as that element's size is changing + */ + if (elem->prev != NULL && elem->prev->state == ELEM_FREE) { + elem_free_list_remove(elem->prev); + join_elem(elem->prev, elem); + malloc_elem_free_list_insert(elem->prev); + } + /* otherwise add ourselves to the free list */ + else { + malloc_elem_free_list_insert(elem); + elem->pad = 0; + } + /* decrease heap's count of allocated elements */ + elem->heap->alloc_count--; + rte_spinlock_unlock(&(elem->heap->lock)); + + return 0; +} + +/* + * attempt to resize a malloc_elem by expanding into any free space + * immediately after it in memory. + */ +int +malloc_elem_resize(struct malloc_elem *elem, size_t size) +{ + const size_t new_size = size + MALLOC_ELEM_OVERHEAD; + /* if we request a smaller size, then always return ok */ + const size_t current_size = elem->size - elem->pad; + if (current_size >= new_size) + return 0; + + struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size); + rte_spinlock_lock(&elem->heap->lock); + if (next ->state != ELEM_FREE) + goto err_return; + if (current_size + next->size < new_size) + goto err_return; + + /* we now know the element fits, so remove from free list, + * join the two + */ + elem_free_list_remove(next); + join_elem(elem, next); + + if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD){ + /* now we have a big block together. Lets cut it down a bit, by splitting */ + struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size); + split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE); + split_elem(elem, split_pt); + malloc_elem_free_list_insert(split_pt); + } + rte_spinlock_unlock(&elem->heap->lock); + return 0; + +err_return: + rte_spinlock_unlock(&elem->heap->lock); + return -1; +} diff --git a/src/dpdk22/lib/librte_eal/common/malloc_elem.h b/src/dpdk22/lib/librte_eal/common/malloc_elem.h new file mode 100644 index 00000000..e05d2ea0 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/malloc_elem.h @@ -0,0 +1,192 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MALLOC_ELEM_H_ +#define MALLOC_ELEM_H_ + +#include + +/* dummy definition of struct so we can use pointers to it in malloc_elem struct */ +struct malloc_heap; + +enum elem_state { + ELEM_FREE = 0, + ELEM_BUSY, + ELEM_PAD /* element is a padding-only header */ +}; + +struct malloc_elem { + struct malloc_heap *heap; + struct malloc_elem *volatile prev; /* points to prev elem in memseg */ + LIST_ENTRY(malloc_elem) free_list; /* list of free elements in heap */ + const struct rte_memseg *ms; + volatile enum elem_state state; + uint32_t pad; + size_t size; +#ifdef RTE_LIBRTE_MALLOC_DEBUG + uint64_t header_cookie; /* Cookie marking start of data */ + /* trailer cookie at start + size */ +#endif +} __rte_cache_aligned; + +#ifndef RTE_LIBRTE_MALLOC_DEBUG +static const unsigned MALLOC_ELEM_TRAILER_LEN = 0; + +/* dummy function - just check if pointer is non-null */ +static inline int +malloc_elem_cookies_ok(const struct malloc_elem *elem){ return elem != NULL; } + +/* dummy function - no header if malloc_debug is not enabled */ +static inline void +set_header(struct malloc_elem *elem __rte_unused){ } + +/* dummy function - no trailer if malloc_debug is not enabled */ +static inline void +set_trailer(struct malloc_elem *elem __rte_unused){ } + + +#else +static const unsigned MALLOC_ELEM_TRAILER_LEN = RTE_CACHE_LINE_SIZE; + +#define MALLOC_HEADER_COOKIE 0xbadbadbadadd2e55ULL /**< Header cookie. */ +#define MALLOC_TRAILER_COOKIE 0xadd2e55badbadbadULL /**< Trailer cookie.*/ + +/* define macros to make referencing the header and trailer cookies easier */ +#define MALLOC_ELEM_TRAILER(elem) (*((uint64_t*)RTE_PTR_ADD(elem, \ + elem->size - MALLOC_ELEM_TRAILER_LEN))) +#define MALLOC_ELEM_HEADER(elem) (elem->header_cookie) + +static inline void +set_header(struct malloc_elem *elem) +{ + if (elem != NULL) + MALLOC_ELEM_HEADER(elem) = MALLOC_HEADER_COOKIE; +} + +static inline void +set_trailer(struct malloc_elem *elem) +{ + if (elem != NULL) + MALLOC_ELEM_TRAILER(elem) = MALLOC_TRAILER_COOKIE; +} + +/* check that the header and trailer cookies are set correctly */ +static inline int +malloc_elem_cookies_ok(const struct malloc_elem *elem) +{ + return (elem != NULL && + MALLOC_ELEM_HEADER(elem) == MALLOC_HEADER_COOKIE && + MALLOC_ELEM_TRAILER(elem) == MALLOC_TRAILER_COOKIE); +} + +#endif + +static const unsigned MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem); +#define MALLOC_ELEM_OVERHEAD (MALLOC_ELEM_HEADER_LEN + MALLOC_ELEM_TRAILER_LEN) + +/* + * Given a pointer to the start of a memory block returned by malloc, get + * the actual malloc_elem header for that block. + */ +static inline struct malloc_elem * +malloc_elem_from_data(const void *data) +{ + if (data == NULL) + return NULL; + + struct malloc_elem *elem = RTE_PTR_SUB(data, MALLOC_ELEM_HEADER_LEN); + if (!malloc_elem_cookies_ok(elem)) + return NULL; + return elem->state != ELEM_PAD ? elem: RTE_PTR_SUB(elem, elem->pad); +} + +/* + * initialise a malloc_elem header + */ +void +malloc_elem_init(struct malloc_elem *elem, + struct malloc_heap *heap, + const struct rte_memseg *ms, + size_t size); + +/* + * initialise a dummy malloc_elem header for the end-of-memseg marker + */ +void +malloc_elem_mkend(struct malloc_elem *elem, + struct malloc_elem *prev_free); + +/* + * return true if the current malloc_elem can hold a block of data + * of the requested size and with the requested alignment + */ +int +malloc_elem_can_hold(struct malloc_elem *elem, size_t size, + unsigned align, size_t bound); + +/* + * reserve a block of data in an existing malloc_elem. If the malloc_elem + * is much larger than the data block requested, we split the element in two. + */ +struct malloc_elem * +malloc_elem_alloc(struct malloc_elem *elem, size_t size, + unsigned align, size_t bound); + +/* + * free a malloc_elem block by adding it to the free list. If the + * blocks either immediately before or immediately after newly freed block + * are also free, the blocks are merged together. + */ +int +malloc_elem_free(struct malloc_elem *elem); + +/* + * attempt to resize a malloc_elem by expanding into any free space + * immediately after it in memory. + */ +int +malloc_elem_resize(struct malloc_elem *elem, size_t size); + +/* + * Given an element size, compute its freelist index. + */ +size_t +malloc_elem_free_list_index(size_t size); + +/* + * Add element to its heap's free list. + */ +void +malloc_elem_free_list_insert(struct malloc_elem *elem); + +#endif /* MALLOC_ELEM_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/malloc_heap.c b/src/dpdk22/lib/librte_eal/common/malloc_heap.c new file mode 100644 index 00000000..d170d037 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/malloc_heap.c @@ -0,0 +1,236 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "malloc_elem.h" +#include "malloc_heap.h" + +static unsigned +check_hugepage_sz(unsigned flags, uint64_t hugepage_sz) +{ + unsigned check_flag = 0; + + if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY)) + return 1; + + switch (hugepage_sz) { + case RTE_PGSIZE_256K: + check_flag = RTE_MEMZONE_256KB; + break; + case RTE_PGSIZE_2M: + check_flag = RTE_MEMZONE_2MB; + break; + case RTE_PGSIZE_16M: + check_flag = RTE_MEMZONE_16MB; + break; + case RTE_PGSIZE_256M: + check_flag = RTE_MEMZONE_256MB; + break; + case RTE_PGSIZE_512M: + check_flag = RTE_MEMZONE_512MB; + break; + case RTE_PGSIZE_1G: + check_flag = RTE_MEMZONE_1GB; + break; + case RTE_PGSIZE_4G: + check_flag = RTE_MEMZONE_4GB; + break; + case RTE_PGSIZE_16G: + check_flag = RTE_MEMZONE_16GB; + } + + return (check_flag & flags); +} + +/* + * Expand the heap with a memseg. + * This reserves the zone and sets a dummy malloc_elem header at the end + * to prevent overflow. The rest of the zone is added to free list as a single + * large free block + */ +static void +malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms) +{ + /* allocate the memory block headers, one at end, one at start */ + struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr; + struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr, + ms->len - MALLOC_ELEM_OVERHEAD); + end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE); + const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem; + + malloc_elem_init(start_elem, heap, ms, elem_size); + malloc_elem_mkend(end_elem, start_elem); + malloc_elem_free_list_insert(start_elem); + + heap->total_size += elem_size; +} + +/* + * Iterates through the freelist for a heap to find a free element + * which can store data of the required size and with the requested alignment. + * If size is 0, find the biggest available elem. + * Returns null on failure, or pointer to element on success. + */ +static struct malloc_elem * +find_suitable_element(struct malloc_heap *heap, size_t size, + unsigned flags, size_t align, size_t bound) +{ + size_t idx; + struct malloc_elem *elem, *alt_elem = NULL; + + for (idx = malloc_elem_free_list_index(size); + idx < RTE_HEAP_NUM_FREELISTS; idx++) { + for (elem = LIST_FIRST(&heap->free_head[idx]); + !!elem; elem = LIST_NEXT(elem, free_list)) { + if (malloc_elem_can_hold(elem, size, align, bound)) { + if (check_hugepage_sz(flags, elem->ms->hugepage_sz)) + return elem; + if (alt_elem == NULL) + alt_elem = elem; + } + } + } + + if ((alt_elem != NULL) && (flags & RTE_MEMZONE_SIZE_HINT_ONLY)) + return alt_elem; + + return NULL; +} + +/* + * Main function to allocate a block of memory from the heap. + * It locks the free list, scans it, and adds a new memseg if the + * scan fails. Once the new memseg is added, it re-scans and should return + * the new element after releasing the lock. + */ +void * +malloc_heap_alloc(struct malloc_heap *heap, + const char *type __attribute__((unused)), size_t size, unsigned flags, + size_t align, size_t bound) +{ + struct malloc_elem *elem; + + size = RTE_CACHE_LINE_ROUNDUP(size); + align = RTE_CACHE_LINE_ROUNDUP(align); + + rte_spinlock_lock(&heap->lock); + + elem = find_suitable_element(heap, size, flags, align, bound); + if (elem != NULL) { + elem = malloc_elem_alloc(elem, size, align, bound); + /* increase heap's count of allocated elements */ + heap->alloc_count++; + } + rte_spinlock_unlock(&heap->lock); + + return elem == NULL ? NULL : (void *)(&elem[1]); +} + +/* + * Function to retrieve data for heap on given socket + */ +int +malloc_heap_get_stats(const struct malloc_heap *heap, + struct rte_malloc_socket_stats *socket_stats) +{ + size_t idx; + struct malloc_elem *elem; + + /* Initialise variables for heap */ + socket_stats->free_count = 0; + socket_stats->heap_freesz_bytes = 0; + socket_stats->greatest_free_size = 0; + + /* Iterate through free list */ + for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) { + for (elem = LIST_FIRST(&heap->free_head[idx]); + !!elem; elem = LIST_NEXT(elem, free_list)) + { + socket_stats->free_count++; + socket_stats->heap_freesz_bytes += elem->size; + if (elem->size > socket_stats->greatest_free_size) + socket_stats->greatest_free_size = elem->size; + } + } + /* Get stats on overall heap and allocated memory on this heap */ + socket_stats->heap_totalsz_bytes = heap->total_size; + socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes - + socket_stats->heap_freesz_bytes); + socket_stats->alloc_count = heap->alloc_count; + return 0; +} + +int +rte_eal_malloc_heap_init(void) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + unsigned ms_cnt; + struct rte_memseg *ms; + + if (mcfg == NULL) + return -1; + + for (ms = &mcfg->memseg[0], ms_cnt = 0; + (ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0); + ms_cnt++, ms++) { +#ifdef RTE_LIBRTE_IVSHMEM + /* + * if segment has ioremap address set, it's an IVSHMEM segment and + * it is not memory to allocate from. + */ + if (ms->ioremap_addr != 0) + continue; +#endif + malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms); + } + + return 0; +} diff --git a/src/dpdk22/lib/librte_eal/common/malloc_heap.h b/src/dpdk22/lib/librte_eal/common/malloc_heap.h new file mode 100644 index 00000000..3ccbef0f --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/malloc_heap.h @@ -0,0 +1,70 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MALLOC_HEAP_H_ +#define MALLOC_HEAP_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static inline unsigned +malloc_get_numa_socket(void) +{ + unsigned socket_id = rte_socket_id(); + + if (socket_id == (unsigned)SOCKET_ID_ANY) + return 0; + + return socket_id; +} + +void * +malloc_heap_alloc(struct malloc_heap *heap, const char *type, size_t size, + unsigned flags, size_t align, size_t bound); + +int +malloc_heap_get_stats(const struct malloc_heap *heap, + struct rte_malloc_socket_stats *socket_stats); + +int +rte_eal_malloc_heap_init(void); + +#ifdef __cplusplus +} +#endif + +#endif /* MALLOC_HEAP_H_ */ diff --git a/src/dpdk22/lib/librte_eal/common/rte_keepalive.c b/src/dpdk22/lib/librte_eal/common/rte_keepalive.c new file mode 100644 index 00000000..736fd0f4 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/rte_keepalive.c @@ -0,0 +1,113 @@ +/*- + * BSD LICENSE + * + * Copyright 2015 Intel Shannon Ltd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include + +static void +print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core) +{ + RTE_LOG(INFO, EAL, "%sLast seen %" PRId64 "ms ago.\n", + msg, + ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000) + / rte_get_tsc_hz() + ); +} + + +void +rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer, + void *ptr_data) +{ + struct rte_keepalive *keepcfg = ptr_data; + int idx_core; + + for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) { + if (keepcfg->active_cores[idx_core] == 0) + continue; + + switch (keepcfg->state_flags[idx_core]) { + case ALIVE: /* Alive */ + keepcfg->state_flags[idx_core] = MISSING; + keepcfg->last_alive[idx_core] = rte_rdtsc(); + break; + case MISSING: /* MIA */ + print_trace("Core MIA. ", keepcfg, idx_core); + keepcfg->state_flags[idx_core] = DEAD; + break; + case DEAD: /* Dead */ + keepcfg->state_flags[idx_core] = GONE; + print_trace("Core died. ", keepcfg, idx_core); + if (keepcfg->callback) + keepcfg->callback( + keepcfg->callback_data, + idx_core + ); + break; + case GONE: /* Buried */ + break; + } + } +} + + +struct rte_keepalive * +rte_keepalive_create(rte_keepalive_failure_callback_t callback, + void *data) +{ + struct rte_keepalive *keepcfg; + + keepcfg = rte_zmalloc("RTE_EAL_KEEPALIVE", + sizeof(struct rte_keepalive), + RTE_CACHE_LINE_SIZE); + if (keepcfg != NULL) { + keepcfg->callback = callback; + keepcfg->callback_data = data; + keepcfg->tsc_initial = rte_rdtsc(); + keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000; + } + return keepcfg; +} + + +void +rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core) +{ + if (id_core < RTE_KEEPALIVE_MAXCORES) + keepcfg->active_cores[id_core] = 1; +} diff --git a/src/dpdk22/lib/librte_eal/common/rte_malloc.c b/src/dpdk22/lib/librte_eal/common/rte_malloc.c new file mode 100644 index 00000000..47deb007 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/common/rte_malloc.c @@ -0,0 +1,262 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "malloc_elem.h" +#include "malloc_heap.h" + + +/* Free the memory space back to heap */ +void rte_free(void *addr) +{ + if (addr == NULL) return; + if (malloc_elem_free(malloc_elem_from_data(addr)) < 0) + rte_panic("Fatal error: Invalid memory\n"); +} + +/* + * Allocate memory on specified heap. + */ +void * +rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int socket, i; + void *ret; + + /* return NULL if size is 0 or alignment is not power-of-2 */ + if (size == 0 || (align && !rte_is_power_of_2(align))) + return NULL; + + if (!rte_eal_has_hugepages()) + socket_arg = SOCKET_ID_ANY; + + if (socket_arg == SOCKET_ID_ANY) + socket = malloc_get_numa_socket(); + else + socket = socket_arg; + + /* Check socket parameter */ + if (socket >= RTE_MAX_NUMA_NODES) + return NULL; + + ret = malloc_heap_alloc(&mcfg->malloc_heaps[socket], type, + size, 0, align == 0 ? 1 : align, 0); + if (ret != NULL || socket_arg != SOCKET_ID_ANY) + return ret; + + /* try other heaps */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { + /* we already tried this one */ + if (i == socket) + continue; + + ret = malloc_heap_alloc(&mcfg->malloc_heaps[i], type, + size, 0, align == 0 ? 1 : align, 0); + if (ret != NULL) + return ret; + } + + return NULL; +} + +/* + * Allocate memory on default heap. + */ +void * +rte_malloc(const char *type, size_t size, unsigned align) +{ + return rte_malloc_socket(type, size, align, SOCKET_ID_ANY); +} + +/* + * Allocate zero'd memory on specified heap. + */ +void * +rte_zmalloc_socket(const char *type, size_t size, unsigned align, int socket) +{ + void *ptr = rte_malloc_socket(type, size, align, socket); + + if (ptr != NULL) + memset(ptr, 0, size); + return ptr; +} + +/* + * Allocate zero'd memory on default heap. + */ +void * +rte_zmalloc(const char *type, size_t size, unsigned align) +{ + return rte_zmalloc_socket(type, size, align, SOCKET_ID_ANY); +} + +/* + * Allocate zero'd memory on specified heap. + */ +void * +rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int socket) +{ + return rte_zmalloc_socket(type, num * size, align, socket); +} + +/* + * Allocate zero'd memory on default heap. + */ +void * +rte_calloc(const char *type, size_t num, size_t size, unsigned align) +{ + return rte_zmalloc(type, num * size, align); +} + +/* + * Resize allocated memory. + */ +void * +rte_realloc(void *ptr, size_t size, unsigned align) +{ + if (ptr == NULL) + return rte_malloc(NULL, size, align); + + struct malloc_elem *elem = malloc_elem_from_data(ptr); + if (elem == NULL) + rte_panic("Fatal error: memory corruption detected\n"); + + size = RTE_CACHE_LINE_ROUNDUP(size), align = RTE_CACHE_LINE_ROUNDUP(align); + /* check alignment matches first, and if ok, see if we can resize block */ + if (RTE_PTR_ALIGN(ptr,align) == ptr && + malloc_elem_resize(elem, size) == 0) + return ptr; + + /* either alignment is off, or we have no room to expand, + * so move data. */ + void *new_ptr = rte_malloc(NULL, size, align); + if (new_ptr == NULL) + return NULL; + const unsigned old_size = elem->size - MALLOC_ELEM_OVERHEAD; + rte_memcpy(new_ptr, ptr, old_size < size ? old_size : size); + rte_free(ptr); + + return new_ptr; +} + +int +rte_malloc_validate(const void *ptr, size_t *size) +{ + const struct malloc_elem *elem = malloc_elem_from_data(ptr); + if (!malloc_elem_cookies_ok(elem)) + return -1; + if (size != NULL) + *size = elem->size - elem->pad - MALLOC_ELEM_OVERHEAD; + return 0; +} + +/* + * Function to retrieve data for heap on given socket + */ +int +rte_malloc_get_socket_stats(int socket, + struct rte_malloc_socket_stats *socket_stats) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + + if (socket >= RTE_MAX_NUMA_NODES || socket < 0) + return -1; + + return malloc_heap_get_stats(&mcfg->malloc_heaps[socket], socket_stats); +} + +/* + * Print stats on memory type. If type is NULL, info on all types is printed + */ +void +rte_malloc_dump_stats(FILE *f, __rte_unused const char *type) +{ + unsigned int socket; + struct rte_malloc_socket_stats sock_stats; + /* Iterate through all initialised heaps */ + for (socket=0; socket< RTE_MAX_NUMA_NODES; socket++) { + if ((rte_malloc_get_socket_stats(socket, &sock_stats) < 0)) + continue; + + fprintf(f, "Socket:%u\n", socket); + fprintf(f, "\tHeap_size:%zu,\n", sock_stats.heap_totalsz_bytes); + fprintf(f, "\tFree_size:%zu,\n", sock_stats.heap_freesz_bytes); + fprintf(f, "\tAlloc_size:%zu,\n", sock_stats.heap_allocsz_bytes); + fprintf(f, "\tGreatest_free_size:%zu,\n", + sock_stats.greatest_free_size); + fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count); + fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count); + } + return; +} + +/* + * TODO: Set limit to memory that can be allocated to memory type + */ +int +rte_malloc_set_limit(__rte_unused const char *type, + __rte_unused size_t max) +{ + return 0; +} + +/* + * Return the physical address of a virtual address obtained through rte_malloc + */ +phys_addr_t +rte_malloc_virt2phy(const void *addr) +{ + const struct malloc_elem *elem = malloc_elem_from_data(addr); + if (elem == NULL) + return 0; + return elem->ms->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->ms->addr); +} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal.c new file mode 100644 index 00000000..635ec363 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal.c @@ -0,0 +1,927 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2012-2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_thread.h" +#include "eal_internal_cfg.h" +#include "eal_filesystem.h" +#include "eal_hugepages.h" +#include "eal_options.h" + +#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL) + +#define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10) + +/* Allow the application to print its usage message too if set */ +static rte_usage_hook_t rte_application_usage_hook = NULL; + +/* early configuration structure, when memory config is not mmapped */ +static struct rte_mem_config early_mem_config; + +/* define fd variable here, because file needs to be kept open for the + * duration of the program, as we hold a write lock on it in the primary proc */ +static int mem_cfg_fd = -1; + +static struct flock wr_lock = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = offsetof(struct rte_mem_config, memseg), + .l_len = sizeof(early_mem_config.memseg), +}; + +/* Address of global and public configuration */ +static struct rte_config rte_config = { + .mem_config = &early_mem_config, +}; + +/* internal configuration (per-core) */ +struct lcore_config lcore_config[RTE_MAX_LCORE]; + +/* internal configuration */ +struct internal_config internal_config; + +/* used by rte_rdtsc() */ +int rte_cycles_vmware_tsc_map; + +/* Return a pointer to the configuration structure */ +struct rte_config * +rte_eal_get_configuration(void) +{ + return &rte_config; +} + +/* parse a sysfs (or other) file containing one integer value */ +int +eal_parse_sysfs_value(const char *filename, unsigned long *val) +{ + FILE *f; + char buf[BUFSIZ]; + char *end = NULL; + + if ((f = fopen(filename, "r")) == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n", + __func__, filename); + return -1; + } + + if (fgets(buf, sizeof(buf), f) == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n", + __func__, filename); + fclose(f); + return -1; + } + *val = strtoul(buf, &end, 0); + if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) { + RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n", + __func__, filename); + fclose(f); + return -1; + } + fclose(f); + return 0; +} + + +/* create memory configuration in shared/mmap memory. Take out + * a write lock on the memsegs, so we can auto-detect primary/secondary. + * This means we never close the file while running (auto-close on exit). + * We also don't lock the whole file, so that in future we can use read-locks + * on other parts, e.g. memzones, to detect if there are running secondary + * processes. */ +static void +rte_eal_config_create(void) +{ + void *rte_mem_cfg_addr; + int retval; + + const char *pathname = eal_runtime_config_path(); + + if (internal_config.no_shconf) + return; + + /* map the config before hugepage address so that we don't waste a page */ + if (internal_config.base_virtaddr != 0) + rte_mem_cfg_addr = (void *) + RTE_ALIGN_FLOOR(internal_config.base_virtaddr - + sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE)); + else + rte_mem_cfg_addr = NULL; + + if (mem_cfg_fd < 0){ + mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660); + if (mem_cfg_fd < 0) + rte_panic("Cannot open '%s' for rte_mem_config\n", pathname); + } + + retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + if (retval < 0){ + close(mem_cfg_fd); + rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname); + } + + retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock); + if (retval < 0){ + close(mem_cfg_fd); + rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary " + "process running?\n", pathname); + } + + rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config), + PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); + + if (rte_mem_cfg_addr == MAP_FAILED){ + rte_panic("Cannot mmap memory for rte_config\n"); + } + memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); + rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; + + /* store address of the config in the config itself so that secondary + * processes could later map the config into this exact location */ + rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr; + +} + +/* attach to an existing shared memory config */ +static void +rte_eal_config_attach(void) +{ + struct rte_mem_config *mem_config; + + const char *pathname = eal_runtime_config_path(); + + if (internal_config.no_shconf) + return; + + if (mem_cfg_fd < 0){ + mem_cfg_fd = open(pathname, O_RDWR); + if (mem_cfg_fd < 0) + rte_panic("Cannot open '%s' for rte_mem_config\n", pathname); + } + + /* map it as read-only first */ + mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config), + PROT_READ, MAP_SHARED, mem_cfg_fd, 0); + if (mem_config == MAP_FAILED) + rte_panic("Cannot mmap memory for rte_config\n"); + + rte_config.mem_config = mem_config; +} + +/* reattach the shared config at exact memory location primary process has it */ +static void +rte_eal_config_reattach(void) +{ + struct rte_mem_config *mem_config; + void *rte_mem_cfg_addr; + + if (internal_config.no_shconf) + return; + + /* save the address primary process has mapped shared config to */ + rte_mem_cfg_addr = (void *) (uintptr_t) rte_config.mem_config->mem_cfg_addr; + + /* unmap original config */ + munmap(rte_config.mem_config, sizeof(struct rte_mem_config)); + + /* remap the config at proper address */ + mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr, + sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED, + mem_cfg_fd, 0); + close(mem_cfg_fd); + if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) + rte_panic("Cannot mmap memory for rte_config\n"); + + rte_config.mem_config = mem_config; +} + +/* Detect if we are a primary or a secondary process */ +enum rte_proc_type_t +eal_proc_type_detect(void) +{ + enum rte_proc_type_t ptype = RTE_PROC_PRIMARY; + const char *pathname = eal_runtime_config_path(); + + /* if we can open the file but not get a write-lock we are a secondary + * process. NOTE: if we get a file handle back, we keep that open + * and don't close it to prevent a race condition between multiple opens */ + if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && + (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) + ptype = RTE_PROC_SECONDARY; + + RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n", + ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY"); + + return ptype; +} + +/* Sets up rte_config structure with the pointer to shared memory config.*/ +static void +rte_config_init(void) +{ + rte_config.process_type = internal_config.process_type; + + switch (rte_config.process_type){ + case RTE_PROC_PRIMARY: + rte_eal_config_create(); + break; + case RTE_PROC_SECONDARY: + rte_eal_config_attach(); + rte_eal_mcfg_wait_complete(rte_config.mem_config); + rte_eal_config_reattach(); + break; + case RTE_PROC_AUTO: + case RTE_PROC_INVALID: + rte_panic("Invalid process type\n"); + } +} + +/* Unlocks hugepage directories that were locked by eal_hugepage_info_init */ +static void +eal_hugedirs_unlock(void) +{ + int i; + + for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) + { + /* skip uninitialized */ + if (internal_config.hugepage_info[i].lock_descriptor < 0) + continue; + /* unlock hugepage file */ + flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN); + close(internal_config.hugepage_info[i].lock_descriptor); + /* reset the field */ + internal_config.hugepage_info[i].lock_descriptor = -1; + } +} + +/* display usage */ +static void +eal_usage(const char *prgname) +{ + printf("\nUsage: %s ", prgname); + eal_common_usage(); + printf("EAL Linux options:\n" + " --"OPT_SOCKET_MEM" Memory to allocate on sockets (comma separated values)\n" + " --"OPT_HUGE_DIR" Directory where hugetlbfs is mounted\n" + " --"OPT_FILE_PREFIX" Prefix for hugepage filenames\n" + " --"OPT_BASE_VIRTADDR" Base virtual address\n" + " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n" + " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" + " --"OPT_XEN_DOM0" Support running on Xen dom0 without hugetlbfs\n" + "\n"); + /* Allow the application to print its usage message too if hook is set */ + if ( rte_application_usage_hook ) { + printf("===== Application Usage =====\n\n"); + rte_application_usage_hook(prgname); + } +} + +/* Set a per-application usage message */ +rte_usage_hook_t +rte_set_application_usage_hook( rte_usage_hook_t usage_func ) +{ + rte_usage_hook_t old_func; + + /* Will be NULL on the first call to denote the last usage routine. */ + old_func = rte_application_usage_hook; + rte_application_usage_hook = usage_func; + + return old_func; +} + +static int +eal_parse_socket_mem(char *socket_mem) +{ + char * arg[RTE_MAX_NUMA_NODES]; + char *end; + int arg_num, i, len; + uint64_t total_mem = 0; + + len = strnlen(socket_mem, SOCKET_MEM_STRLEN); + if (len == SOCKET_MEM_STRLEN) { + RTE_LOG(ERR, EAL, "--socket-mem is too long\n"); + return -1; + } + + /* all other error cases will be caught later */ + if (!isdigit(socket_mem[len-1])) + return -1; + + /* split the optarg into separate socket values */ + arg_num = rte_strsplit(socket_mem, len, + arg, RTE_MAX_NUMA_NODES, ','); + + /* if split failed, or 0 arguments */ + if (arg_num <= 0) + return -1; + + internal_config.force_sockets = 1; + + /* parse each defined socket option */ + errno = 0; + for (i = 0; i < arg_num; i++) { + end = NULL; + internal_config.socket_mem[i] = strtoull(arg[i], &end, 10); + + /* check for invalid input */ + if ((errno != 0) || + (arg[i][0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + internal_config.socket_mem[i] *= 1024ULL; + internal_config.socket_mem[i] *= 1024ULL; + total_mem += internal_config.socket_mem[i]; + } + + /* check if we have a positive amount of total memory */ + if (total_mem == 0) + return -1; + + return 0; +} + +static int +eal_parse_base_virtaddr(const char *arg) +{ + char *end; + uint64_t addr; + + errno = 0; + addr = strtoull(arg, &end, 16); + + /* check for errors */ + if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0')) + return -1; + + /* make sure we don't exceed 32-bit boundary on 32-bit target */ +#ifndef RTE_ARCH_64 + if (addr >= UINTPTR_MAX) + return -1; +#endif + + /* align the addr on 16M boundary, 16MB is the minimum huge page + * size on IBM Power architecture. If the addr is aligned to 16MB, + * it can align to 2MB for x86. So this alignment can also be used + * on x86 */ + internal_config.base_virtaddr = + RTE_PTR_ALIGN_CEIL((uintptr_t)addr, (size_t)RTE_PGSIZE_16M); + + return 0; +} + +static int +eal_parse_vfio_intr(const char *mode) +{ + unsigned i; + static struct { + const char *name; + enum rte_intr_mode value; + } map[] = { + { "legacy", RTE_INTR_MODE_LEGACY }, + { "msi", RTE_INTR_MODE_MSI }, + { "msix", RTE_INTR_MODE_MSIX }, + }; + + for (i = 0; i < RTE_DIM(map); i++) { + if (!strcmp(mode, map[i].name)) { + internal_config.vfio_intr_mode = map[i].value; + return 0; + } + } + return -1; +} + +static inline size_t +eal_get_hugepage_mem_size(void) +{ + uint64_t size = 0; + unsigned i, j; + + for (i = 0; i < internal_config.num_hugepage_sizes; i++) { + struct hugepage_info *hpi = &internal_config.hugepage_info[i]; + if (hpi->hugedir != NULL) { + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { + size += hpi->hugepage_sz * hpi->num_pages[j]; + } + } + } + + return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX; +} + +/* Parse the arguments for --log-level only */ +static void +eal_log_level_parse(int argc, char **argv) +{ + int opt; + char **argvopt; + int option_index; + const int old_optind = optind; + const int old_optopt = optopt; + char * const old_optarg = optarg; + + argvopt = argv; + optind = 1; + + eal_reset_internal_config(&internal_config); + + while ((opt = getopt_long(argc, argvopt, eal_short_options, + eal_long_options, &option_index)) != EOF) { + + int ret; + + /* getopt is not happy, stop right now */ + if (opt == '?') + break; + + ret = (opt == OPT_LOG_LEVEL_NUM) ? + eal_parse_common_option(opt, optarg, &internal_config) : 0; + + /* common parser is not happy */ + if (ret < 0) + break; + } + + /* restore getopt lib */ + optind = old_optind; + optopt = old_optopt; + optarg = old_optarg; +} + +/* Parse the argument given in the command line of the application */ +static int +eal_parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + const int old_optind = optind; + const int old_optopt = optopt; + char * const old_optarg = optarg; + + argvopt = argv; + optind = 1; + + while ((opt = getopt_long(argc, argvopt, eal_short_options, + eal_long_options, &option_index)) != EOF) { + + /* getopt is not happy, stop right now */ + if (opt == '?') { + eal_usage(prgname); + ret = -1; + goto out; + } + + ret = eal_parse_common_option(opt, optarg, &internal_config); + /* common parser is not happy */ + if (ret < 0) { + eal_usage(prgname); + ret = -1; + goto out; + } + /* common parser handled this option */ + if (ret == 0) + continue; + + switch (opt) { + case 'h': + eal_usage(prgname); + exit(EXIT_SUCCESS); + + /* long options */ + case OPT_XEN_DOM0_NUM: +#ifdef RTE_LIBRTE_XEN_DOM0 + internal_config.xen_dom0_support = 1; +#else + RTE_LOG(ERR, EAL, "Can't support DPDK app " + "running on Dom0, please configure" + " RTE_LIBRTE_XEN_DOM0=y\n"); + ret = -1; + goto out; +#endif + break; + + case OPT_HUGE_DIR_NUM: + internal_config.hugepage_dir = optarg; + break; + + case OPT_FILE_PREFIX_NUM: + internal_config.hugefile_prefix = optarg; + break; + + case OPT_SOCKET_MEM_NUM: + if (eal_parse_socket_mem(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameters for --" + OPT_SOCKET_MEM "\n"); + eal_usage(prgname); + ret = -1; + goto out; + } + break; + + case OPT_BASE_VIRTADDR_NUM: + if (eal_parse_base_virtaddr(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameter for --" + OPT_BASE_VIRTADDR "\n"); + eal_usage(prgname); + ret = -1; + goto out; + } + break; + + case OPT_VFIO_INTR_NUM: + if (eal_parse_vfio_intr(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameters for --" + OPT_VFIO_INTR "\n"); + eal_usage(prgname); + ret = -1; + goto out; + } + break; + + case OPT_CREATE_UIO_DEV_NUM: + internal_config.create_uio_dev = 1; + break; + + default: + if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { + RTE_LOG(ERR, EAL, "Option %c is not supported " + "on Linux\n", opt); + } else if (opt >= OPT_LONG_MIN_NUM && + opt < OPT_LONG_MAX_NUM) { + RTE_LOG(ERR, EAL, "Option %s is not supported " + "on Linux\n", + eal_long_options[option_index].name); + } else { + RTE_LOG(ERR, EAL, "Option %d is not supported " + "on Linux\n", opt); + } + eal_usage(prgname); + ret = -1; + goto out; + } + } + + if (eal_adjust_config(&internal_config) != 0) { + ret = -1; + goto out; + } + + /* sanity checks */ + if (eal_check_common_options(&internal_config) != 0) { + eal_usage(prgname); + ret = -1; + goto out; + } + + /* --xen-dom0 doesn't make sense with --socket-mem */ + if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) { + RTE_LOG(ERR, EAL, "Options --"OPT_SOCKET_MEM" cannot be specified " + "together with --"OPT_XEN_DOM0"\n"); + eal_usage(prgname); + ret = -1; + goto out; + } + + if (optind >= 0) + argv[optind-1] = prgname; + ret = optind-1; + +out: + /* restore getopt lib */ + optind = old_optind; + optopt = old_optopt; + optarg = old_optarg; + + return ret; +} + +static void +eal_check_mem_on_local_socket(void) +{ + const struct rte_memseg *ms; + int i, socket_id; + + socket_id = rte_lcore_to_socket_id(rte_config.master_lcore); + + ms = rte_eal_get_physmem_layout(); + + for (i = 0; i < RTE_MAX_MEMSEG; i++) + if (ms[i].socket_id == socket_id && + ms[i].len > 0) + return; + + RTE_LOG(WARNING, EAL, "WARNING: Master core has no " + "memory on local socket!\n"); +} + +static int +sync_func(__attribute__((unused)) void *arg) +{ + return 0; +} + +inline static void +rte_eal_mcfg_complete(void) +{ + /* ALL shared mem_config related INIT DONE */ + if (rte_config.process_type == RTE_PROC_PRIMARY) + rte_config.mem_config->magic = RTE_MAGIC; +} + +/* + * Request iopl privilege for all RPL, returns 0 on success + * iopl() call is mostly for the i386 architecture. For other architectures, + * return -1 to indicate IO privilege can't be changed in this way. + */ +int +rte_eal_iopl_init(void) +{ +#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) + if (iopl(3) != 0) + return -1; + return 0; +#else + return -1; +#endif +} + +/* Launch threads, called at application init(). */ +int +rte_eal_init(int argc, char **argv) +{ + int i, fctret, ret; + pthread_t thread_id; + static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0); + const char *logid; + char cpuset[RTE_CPU_AFFINITY_STR_LEN]; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + if (!rte_atomic32_test_and_set(&run_once)) + return -1; + + logid = strrchr(argv[0], '/'); + logid = strdup(logid ? logid + 1: argv[0]); + + thread_id = pthread_self(); + + if (rte_eal_log_early_init() < 0) + rte_panic("Cannot init early logs\n"); + + eal_log_level_parse(argc, argv); + + /* set log level as early as possible */ + rte_set_log_level(internal_config.log_level); + + if (rte_eal_cpu_init() < 0) + rte_panic("Cannot detect lcores\n"); + + fctret = eal_parse_args(argc, argv); + if (fctret < 0) + exit(1); + + if (internal_config.no_hugetlbfs == 0 && + internal_config.process_type != RTE_PROC_SECONDARY && + internal_config.xen_dom0_support == 0 && + eal_hugepage_info_init() < 0) + rte_panic("Cannot get hugepage information\n"); + + if (internal_config.memory == 0 && internal_config.force_sockets == 0) { + if (internal_config.no_hugetlbfs) + internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE; + else + internal_config.memory = eal_get_hugepage_mem_size(); + } + + if (internal_config.vmware_tsc_map == 1) { +#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT + rte_cycles_vmware_tsc_map = 1; + RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, " + "you must have monitor_control.pseudo_perfctr = TRUE\n"); +#else + RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because " + "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n"); +#endif + } + + rte_srand(rte_rdtsc()); + + rte_config_init(); + + if (rte_eal_pci_init() < 0) + rte_panic("Cannot init PCI\n"); + +#ifdef RTE_LIBRTE_IVSHMEM + if (rte_eal_ivshmem_init() < 0) + rte_panic("Cannot init IVSHMEM\n"); +#endif + + if (rte_eal_memory_init() < 0) + rte_panic("Cannot init memory\n"); + + /* the directories are locked during eal_hugepage_info_init */ + eal_hugedirs_unlock(); + + if (rte_eal_memzone_init() < 0) + rte_panic("Cannot init memzone\n"); + + if (rte_eal_tailqs_init() < 0) + rte_panic("Cannot init tail queues for objects\n"); + +#ifdef RTE_LIBRTE_IVSHMEM + if (rte_eal_ivshmem_obj_init() < 0) + rte_panic("Cannot init IVSHMEM objects\n"); +#endif + + if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) + rte_panic("Cannot init logs\n"); + + if (rte_eal_alarm_init() < 0) + rte_panic("Cannot init interrupt-handling thread\n"); + + if (rte_eal_timer_init() < 0) + rte_panic("Cannot init HPET or TSC timers\n"); + + eal_check_mem_on_local_socket(); + + rte_eal_mcfg_complete(); + + if (eal_plugins_init() < 0) + rte_panic("Cannot init plugins\n"); + + eal_thread_init_master(rte_config.master_lcore); + + ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + + RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n", + rte_config.master_lcore, (int)thread_id, cpuset, + ret == 0 ? "" : "..."); + + if (rte_eal_dev_init() < 0) + rte_panic("Cannot init pmd devices\n"); + + if (rte_eal_intr_init() < 0) + rte_panic("Cannot init interrupt-handling thread\n"); + + RTE_LCORE_FOREACH_SLAVE(i) { + + /* + * create communication pipes between master thread + * and children + */ + if (pipe(lcore_config[i].pipe_master2slave) < 0) + rte_panic("Cannot create pipe\n"); + if (pipe(lcore_config[i].pipe_slave2master) < 0) + rte_panic("Cannot create pipe\n"); + + lcore_config[i].state = WAIT; + + /* create a thread for each lcore */ + ret = pthread_create(&lcore_config[i].thread_id, NULL, + eal_thread_loop, NULL); + if (ret != 0) + rte_panic("Cannot create thread\n"); + + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, + "lcore-slave-%d", i); + ret = rte_thread_setname(lcore_config[i].thread_id, + thread_name); + if (ret != 0) + RTE_LOG(ERR, EAL, + "Cannot set name for lcore thread\n"); + } + + /* + * Launch a dummy function on all slave lcores, so that master lcore + * knows they are all ready when this function returns. + */ + rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); + rte_eal_mp_wait_lcore(); + + /* Probe & Initialize PCI devices */ + if (rte_eal_pci_probe()) + rte_panic("Cannot probe PCI\n"); + + return fctret; +} + +/* get core role */ +enum rte_lcore_role_t +rte_eal_lcore_role(unsigned lcore_id) +{ + return rte_config.lcore_role[lcore_id]; +} + +enum rte_proc_type_t +rte_eal_process_type(void) +{ + return rte_config.process_type; +} + +int rte_eal_has_hugepages(void) +{ + return ! internal_config.no_hugetlbfs; +} + +int +rte_eal_check_module(const char *module_name) +{ + char mod_name[30]; /* Any module names can be longer than 30 bytes? */ + int ret = 0; + int n; + + if (NULL == module_name) + return -1; + + FILE *fd = fopen("/proc/modules", "r"); + if (NULL == fd) { + RTE_LOG(ERR, EAL, "Open /proc/modules failed!" + " error %i (%s)\n", errno, strerror(errno)); + return -1; + } + while (!feof(fd)) { + n = fscanf(fd, "%29s %*[^\n]", mod_name); + if ((n == 1) && !strcmp(mod_name, module_name)) { + ret = 1; + break; + } + } + fclose(fd); + + return ret; +} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_alarm.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_alarm.c new file mode 100644 index 00000000..8b042abc --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_alarm.c @@ -0,0 +1,273 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef TFD_NONBLOCK +#include +#define TFD_NONBLOCK O_NONBLOCK +#endif + +#define NS_PER_US 1000 +#define US_PER_MS 1000 +#define MS_PER_S 1000 +#define US_PER_S (US_PER_MS * MS_PER_S) + +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW +#else +#define CLOCK_TYPE_ID CLOCK_MONOTONIC +#endif + +struct alarm_entry { + LIST_ENTRY(alarm_entry) next; + struct timeval time; + rte_eal_alarm_callback cb_fn; + void *cb_arg; + volatile uint8_t executing; + volatile pthread_t executing_id; +}; + +static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER(); +static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER; + +static struct rte_intr_handle intr_handle = {.fd = -1 }; +static int handler_registered = 0; +static void eal_alarm_callback(struct rte_intr_handle *hdl, void *arg); + +int +rte_eal_alarm_init(void) +{ + intr_handle.type = RTE_INTR_HANDLE_ALARM; + /* create a timerfd file descriptor */ + intr_handle.fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK); + if (intr_handle.fd == -1) + goto error; + + return 0; + +error: + rte_errno = errno; + return -1; +} + +static void +eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused, + void *arg __rte_unused) +{ + struct timespec now; + struct alarm_entry *ap; + + rte_spinlock_lock(&alarm_list_lk); + while ((ap = LIST_FIRST(&alarm_list)) !=NULL && + clock_gettime(CLOCK_TYPE_ID, &now) == 0 && + (ap->time.tv_sec < now.tv_sec || (ap->time.tv_sec == now.tv_sec && + (ap->time.tv_usec * NS_PER_US) <= now.tv_nsec))) { + ap->executing = 1; + ap->executing_id = pthread_self(); + rte_spinlock_unlock(&alarm_list_lk); + + ap->cb_fn(ap->cb_arg); + + rte_spinlock_lock(&alarm_list_lk); + + LIST_REMOVE(ap, next); + rte_free(ap); + } + + if (!LIST_EMPTY(&alarm_list)) { + struct itimerspec atime = { .it_interval = { 0, 0 } }; + + ap = LIST_FIRST(&alarm_list); + atime.it_value.tv_sec = ap->time.tv_sec; + atime.it_value.tv_nsec = ap->time.tv_usec * NS_PER_US; + /* perform borrow for subtraction if necessary */ + if (now.tv_nsec > (ap->time.tv_usec * NS_PER_US)) + atime.it_value.tv_sec--, atime.it_value.tv_nsec += US_PER_S * NS_PER_US; + + atime.it_value.tv_sec -= now.tv_sec; + atime.it_value.tv_nsec -= now.tv_nsec; + timerfd_settime(intr_handle.fd, 0, &atime, NULL); + } + rte_spinlock_unlock(&alarm_list_lk); +} + +int +rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg) +{ + struct timespec now; + int ret = 0; + struct alarm_entry *ap, *new_alarm; + + /* Check parameters, including that us won't cause a uint64_t overflow */ + if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL) + return -EINVAL; + + new_alarm = rte_zmalloc(NULL, sizeof(*new_alarm), 0); + if (new_alarm == NULL) + return -ENOMEM; + + /* use current time to calculate absolute time of alarm */ + clock_gettime(CLOCK_TYPE_ID, &now); + + new_alarm->cb_fn = cb_fn; + new_alarm->cb_arg = cb_arg; + new_alarm->time.tv_usec = ((now.tv_nsec / NS_PER_US) + us) % US_PER_S; + new_alarm->time.tv_sec = now.tv_sec + (((now.tv_nsec / NS_PER_US) + us) / US_PER_S); + + rte_spinlock_lock(&alarm_list_lk); + if (!handler_registered) { + ret |= rte_intr_callback_register(&intr_handle, + eal_alarm_callback, NULL); + handler_registered = (ret == 0) ? 1 : 0; + } + + if (LIST_EMPTY(&alarm_list)) + LIST_INSERT_HEAD(&alarm_list, new_alarm, next); + else { + LIST_FOREACH(ap, &alarm_list, next) { + if (ap->time.tv_sec > new_alarm->time.tv_sec || + (ap->time.tv_sec == new_alarm->time.tv_sec && + ap->time.tv_usec > new_alarm->time.tv_usec)){ + LIST_INSERT_BEFORE(ap, new_alarm, next); + break; + } + if (LIST_NEXT(ap, next) == NULL) { + LIST_INSERT_AFTER(ap, new_alarm, next); + break; + } + } + } + + if (LIST_FIRST(&alarm_list) == new_alarm) { + struct itimerspec alarm_time = { + .it_interval = {0, 0}, + .it_value = { + .tv_sec = us / US_PER_S, + .tv_nsec = (us % US_PER_S) * NS_PER_US, + }, + }; + ret |= timerfd_settime(intr_handle.fd, 0, &alarm_time, NULL); + } + rte_spinlock_unlock(&alarm_list_lk); + + return ret; +} + +int +rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg) +{ + struct alarm_entry *ap, *ap_prev; + int count = 0; + int err = 0; + int executing; + + if (!cb_fn) { + rte_errno = EINVAL; + return -1; + } + + do { + executing = 0; + rte_spinlock_lock(&alarm_list_lk); + /* remove any matches at the start of the list */ + while ((ap = LIST_FIRST(&alarm_list)) != NULL && + cb_fn == ap->cb_fn && + (cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) { + + if (ap->executing == 0) { + LIST_REMOVE(ap, next); + rte_free(ap); + count++; + } else { + /* If calling from other context, mark that alarm is executing + * so loop can spin till it finish. Otherwise we are trying to + * cancel our self - mark it by EINPROGRESS */ + if (pthread_equal(ap->executing_id, pthread_self()) == 0) + executing++; + else + err = EINPROGRESS; + + break; + } + } + ap_prev = ap; + + /* now go through list, removing entries not at start */ + LIST_FOREACH(ap, &alarm_list, next) { + /* this won't be true first time through */ + if (cb_fn == ap->cb_fn && + (cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) { + + if (ap->executing == 0) { + LIST_REMOVE(ap, next); + rte_free(ap); + count++; + ap = ap_prev; + } else if (pthread_equal(ap->executing_id, pthread_self()) == 0) + executing++; + else + err = EINPROGRESS; + } + ap_prev = ap; + } + rte_spinlock_unlock(&alarm_list_lk); + } while (executing != 0); + + if (count == 0 && err == 0) + rte_errno = ENOENT; + else if (err) + rte_errno = err; + + return count; +} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_debug.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_debug.c new file mode 100644 index 00000000..907fbfa7 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_debug.c @@ -0,0 +1,119 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define BACKTRACE_SIZE 256 + +/* dump the stack of the calling core */ +void rte_dump_stack(void) +{ + void *func[BACKTRACE_SIZE]; + char **symb = NULL; + int size; + + size = backtrace(func, BACKTRACE_SIZE); + symb = backtrace_symbols(func, size); + + if (symb == NULL) + return; + + while (size > 0) { + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_EAL, + "%d: [%s]\n", size, symb[size - 1]); + size --; + } + + free(symb); +} + +/* not implemented in this environment */ +void rte_dump_registers(void) +{ + return; +} + +/* call abort(), it will generate a coredump if enabled */ +void __rte_panic(const char *funcname, const char *format, ...) +{ + va_list ap; + + /* disable history */ + rte_log_set_history(0); + + rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname); + va_start(ap, format); + rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap); + va_end(ap); + rte_dump_stack(); + rte_dump_registers(); + abort(); +} + +/* + * Like rte_panic this terminates the application. However, no traceback is + * provided and no core-dump is generated. + */ +void +rte_exit(int exit_code, const char *format, ...) +{ + va_list ap; + + /* disable history */ + rte_log_set_history(0); + + if (exit_code != 0) + RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n" + " Cause: ", exit_code); + + va_start(ap, format); + rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap); + va_end(ap); + +#ifndef RTE_EAL_ALWAYS_PANIC_ON_ERROR + exit(exit_code); +#else + rte_dump_stack(); + rte_dump_registers(); + abort(); +#endif +} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c new file mode 100644 index 00000000..18858e2d --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -0,0 +1,365 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "rte_string_fns.h" +#include "eal_internal_cfg.h" +#include "eal_hugepages.h" +#include "eal_filesystem.h" + +static const char sys_dir_path[] = "/sys/kernel/mm/hugepages"; + +/* this function is only called from eal_hugepage_info_init which itself + * is only called from a primary process */ +static uint32_t +get_num_hugepages(const char *subdir) +{ + char path[PATH_MAX]; + long unsigned resv_pages, num_pages = 0; + const char *nr_hp_file = "free_hugepages"; + const char *nr_rsvd_file = "resv_hugepages"; + + /* first, check how many reserved pages kernel reports */ + snprintf(path, sizeof(path), "%s/%s/%s", + sys_dir_path, subdir, nr_rsvd_file); + if (eal_parse_sysfs_value(path, &resv_pages) < 0) + return 0; + + snprintf(path, sizeof(path), "%s/%s/%s", + sys_dir_path, subdir, nr_hp_file); + if (eal_parse_sysfs_value(path, &num_pages) < 0) + return 0; + + if (num_pages == 0) + RTE_LOG(WARNING, EAL, "No free hugepages reported in %s\n", + subdir); + + /* adjust num_pages */ + if (num_pages >= resv_pages) + num_pages -= resv_pages; + else if (resv_pages) + num_pages = 0; + + /* we want to return a uint32_t and more than this looks suspicious + * anyway ... */ + if (num_pages > UINT32_MAX) + num_pages = UINT32_MAX; + + return num_pages; +} + +static uint64_t +get_default_hp_size(void) +{ + const char proc_meminfo[] = "/proc/meminfo"; + const char str_hugepagesz[] = "Hugepagesize:"; + unsigned hugepagesz_len = sizeof(str_hugepagesz) - 1; + char buffer[256]; + unsigned long long size = 0; + + FILE *fd = fopen(proc_meminfo, "r"); + if (fd == NULL) + rte_panic("Cannot open %s\n", proc_meminfo); + while(fgets(buffer, sizeof(buffer), fd)){ + if (strncmp(buffer, str_hugepagesz, hugepagesz_len) == 0){ + size = rte_str_to_size(&buffer[hugepagesz_len]); + break; + } + } + fclose(fd); + if (size == 0) + rte_panic("Cannot get default hugepage size from %s\n", proc_meminfo); + return size; +} + +static const char * +get_hugepage_dir(uint64_t hugepage_sz) +{ + enum proc_mount_fieldnames { + DEVICE = 0, + MOUNTPT, + FSTYPE, + OPTIONS, + _FIELDNAME_MAX + }; + static uint64_t default_size = 0; + const char proc_mounts[] = "/proc/mounts"; + const char hugetlbfs_str[] = "hugetlbfs"; + const size_t htlbfs_str_len = sizeof(hugetlbfs_str) - 1; + const char pagesize_opt[] = "pagesize="; + const size_t pagesize_opt_len = sizeof(pagesize_opt) - 1; + const char split_tok = ' '; + char *splitstr[_FIELDNAME_MAX]; + char buf[BUFSIZ]; + char *retval = NULL; + + FILE *fd = fopen(proc_mounts, "r"); + if (fd == NULL) + rte_panic("Cannot open %s\n", proc_mounts); + + if (default_size == 0) + default_size = get_default_hp_size(); + + while (fgets(buf, sizeof(buf), fd)){ + if (rte_strsplit(buf, sizeof(buf), splitstr, _FIELDNAME_MAX, + split_tok) != _FIELDNAME_MAX) { + RTE_LOG(ERR, EAL, "Error parsing %s\n", proc_mounts); + break; /* return NULL */ + } + + /* we have a specified --huge-dir option, only examine that dir */ + if (internal_config.hugepage_dir != NULL && + strcmp(splitstr[MOUNTPT], internal_config.hugepage_dir) != 0) + continue; + + if (strncmp(splitstr[FSTYPE], hugetlbfs_str, htlbfs_str_len) == 0){ + const char *pagesz_str = strstr(splitstr[OPTIONS], pagesize_opt); + + /* if no explicit page size, the default page size is compared */ + if (pagesz_str == NULL){ + if (hugepage_sz == default_size){ + retval = strdup(splitstr[MOUNTPT]); + break; + } + } + /* there is an explicit page size, so check it */ + else { + uint64_t pagesz = rte_str_to_size(&pagesz_str[pagesize_opt_len]); + if (pagesz == hugepage_sz) { + retval = strdup(splitstr[MOUNTPT]); + break; + } + } + } /* end if strncmp hugetlbfs */ + } /* end while fgets */ + + fclose(fd); + return retval; +} + +/* + * Clear the hugepage directory of whatever hugepage files + * there are. Checks if the file is locked (i.e. + * if it's in use by another DPDK process). + */ +static int +clear_hugedir(const char * hugedir) +{ + DIR *dir; + struct dirent *dirent; + int dir_fd, fd, lck_result; + const char filter[] = "*map_*"; /* matches hugepage files */ + + /* open directory */ + dir = opendir(hugedir); + if (!dir) { + RTE_LOG(ERR, EAL, "Unable to open hugepage directory %s\n", + hugedir); + goto error; + } + dir_fd = dirfd(dir); + + dirent = readdir(dir); + if (!dirent) { + RTE_LOG(ERR, EAL, "Unable to read hugepage directory %s\n", + hugedir); + goto error; + } + + while(dirent != NULL){ + /* skip files that don't match the hugepage pattern */ + if (fnmatch(filter, dirent->d_name, 0) > 0) { + dirent = readdir(dir); + continue; + } + + /* try and lock the file */ + fd = openat(dir_fd, dirent->d_name, O_RDONLY); + + /* skip to next file */ + if (fd == -1) { + dirent = readdir(dir); + continue; + } + + /* non-blocking lock */ + lck_result = flock(fd, LOCK_EX | LOCK_NB); + + /* if lock succeeds, unlock and remove the file */ + if (lck_result != -1) { + flock(fd, LOCK_UN); + unlinkat(dir_fd, dirent->d_name, 0); + } + close (fd); + dirent = readdir(dir); + } + + closedir(dir); + return 0; + +error: + if (dir) + closedir(dir); + + RTE_LOG(ERR, EAL, "Error while clearing hugepage dir: %s\n", + strerror(errno)); + + return -1; +} + +static int +compare_hpi(const void *a, const void *b) +{ + const struct hugepage_info *hpi_a = a; + const struct hugepage_info *hpi_b = b; + + return hpi_b->hugepage_sz - hpi_a->hugepage_sz; +} + +/* + * when we initialize the hugepage info, everything goes + * to socket 0 by default. it will later get sorted by memory + * initialization procedure. + */ +int +eal_hugepage_info_init(void) +{ + const char dirent_start_text[] = "hugepages-"; + const size_t dirent_start_len = sizeof(dirent_start_text) - 1; + unsigned i, num_sizes = 0; + DIR *dir; + struct dirent *dirent; + + dir = opendir(sys_dir_path); + if (dir == NULL) + rte_panic("Cannot open directory %s to read system hugepage " + "info\n", sys_dir_path); + + for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) { + struct hugepage_info *hpi; + + if (strncmp(dirent->d_name, dirent_start_text, + dirent_start_len) != 0) + continue; + + if (num_sizes >= MAX_HUGEPAGE_SIZES) + break; + + hpi = &internal_config.hugepage_info[num_sizes]; + hpi->hugepage_sz = + rte_str_to_size(&dirent->d_name[dirent_start_len]); + hpi->hugedir = get_hugepage_dir(hpi->hugepage_sz); + + /* first, check if we have a mountpoint */ + if (hpi->hugedir == NULL) { + uint32_t num_pages; + + num_pages = get_num_hugepages(dirent->d_name); + if (num_pages > 0) + RTE_LOG(NOTICE, EAL, + "%" PRIu32 " hugepages of size " + "%" PRIu64 " reserved, but no mounted " + "hugetlbfs found for that size\n", + num_pages, hpi->hugepage_sz); + continue; + } + + /* try to obtain a writelock */ + hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY); + + /* if blocking lock failed */ + if (flock(hpi->lock_descriptor, LOCK_EX) == -1) { + RTE_LOG(CRIT, EAL, + "Failed to lock hugepage directory!\n"); + break; + } + /* clear out the hugepages dir from unused pages */ + if (clear_hugedir(hpi->hugedir) == -1) + break; + + /* for now, put all pages into socket 0, + * later they will be sorted */ + hpi->num_pages[0] = get_num_hugepages(dirent->d_name); + +#ifndef RTE_ARCH_64 + /* for 32-bit systems, limit number of hugepages to + * 1GB per page size */ + hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0], + RTE_PGSIZE_1G / hpi->hugepage_sz); +#endif + + num_sizes++; + } + closedir(dir); + + /* something went wrong, and we broke from the for loop above */ + if (dirent != NULL) + return -1; + + internal_config.num_hugepage_sizes = num_sizes; + + /* sort the page directory entries by size, largest to smallest */ + qsort(&internal_config.hugepage_info[0], num_sizes, + sizeof(internal_config.hugepage_info[0]), compare_hpi); + + /* now we have all info, check we have at least one valid size */ + for (i = 0; i < num_sizes; i++) + if (internal_config.hugepage_info[i].hugedir != NULL && + internal_config.hugepage_info[i].num_pages[0] > 0) + return 0; + + /* no valid hugepage mounts available, return error */ + return -1; +} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_interrupts.c new file mode 100644 index 00000000..06b26a9e --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -0,0 +1,1224 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_vfio.h" +#include "eal_thread.h" + +#define EAL_INTR_EPOLL_WAIT_FOREVER (-1) +#define NB_OTHER_INTR 1 + +static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */ + +/** + * union for pipe fds. + */ +union intr_pipefds{ + struct { + int pipefd[2]; + }; + struct { + int readfd; + int writefd; + }; +}; + +/** + * union buffer for reading on different devices + */ +union rte_intr_read_buffer { + int uio_intr_count; /* for uio device */ +#ifdef VFIO_PRESENT + uint64_t vfio_intr_count; /* for vfio device */ +#endif + uint64_t timerfd_num; /* for timerfd */ + char charbuf[16]; /* for others */ +}; + +TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback); +TAILQ_HEAD(rte_intr_source_list, rte_intr_source); + +struct rte_intr_callback { + TAILQ_ENTRY(rte_intr_callback) next; + rte_intr_callback_fn cb_fn; /**< callback address */ + void *cb_arg; /**< parameter for callback */ +}; + +struct rte_intr_source { + TAILQ_ENTRY(rte_intr_source) next; + struct rte_intr_handle intr_handle; /**< interrupt handle */ + struct rte_intr_cb_list callbacks; /**< user callbacks */ + uint32_t active; +}; + +/* global spinlock for interrupt data operation */ +static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER; + +/* union buffer for pipe read/write */ +static union intr_pipefds intr_pipe; + +/* interrupt sources list */ +static struct rte_intr_source_list intr_sources; + +/* interrupt handling thread */ +static pthread_t intr_thread; + +/* VFIO interrupts */ +#ifdef VFIO_PRESENT + +#define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int)) +/* irq set buffer length for queue interrupts and LSC interrupt */ +#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ + sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1)) + +/* enable legacy (INTx) interrupts */ +static int +vfio_enable_intx(struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + /* enable INTx */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + /* unmask INTx after enabling */ + memset(irq_set, 0, len); + len = sizeof(struct vfio_irq_set); + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + +/* disable legacy (INTx) interrupts */ +static int +vfio_disable_intx(struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + /* mask interrupts before disabling */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + /* disable INTx*/ + memset(irq_set, 0, len); + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, + "Error disabling INTx interrupts for fd %d\n", intr_handle->fd); + return -1; + } + return 0; +} + +/* enable MSI interrupts */ +static int +vfio_enable_msi(struct rte_intr_handle *intr_handle) { + int len, ret; + char irq_set_buf[IRQ_SET_BUF_LEN]; + struct vfio_irq_set *irq_set; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + +/* disable MSI interrupts */ +static int +vfio_disable_msi(struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) + RTE_LOG(ERR, EAL, + "Error disabling MSI interrupts for fd %d\n", intr_handle->fd); + + return ret; +} + +/* enable MSI-X interrupts */ +static int +vfio_enable_msix(struct rte_intr_handle *intr_handle) { + int len, ret; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + struct vfio_irq_set *irq_set; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + if (!intr_handle->max_intr) + intr_handle->max_intr = 1; + else if (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID) + intr_handle->max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1; + + irq_set->count = intr_handle->max_intr; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + /* INTR vector offset 0 reserve for non-efds mapping */ + fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd; + memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds, + sizeof(*intr_handle->efds) * intr_handle->nb_efd); + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + +/* disable MSI-X interrupts */ +static int +vfio_disable_msix(struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) + RTE_LOG(ERR, EAL, + "Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd); + + return ret; +} +#endif + +static int +uio_intx_intr_disable(struct rte_intr_handle *intr_handle) +{ + unsigned char command_high; + + /* use UIO config file descriptor for uio_pci_generic */ + if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { + RTE_LOG(ERR, EAL, + "Error reading interrupts status for fd %d\n", + intr_handle->uio_cfg_fd); + return -1; + } + /* disable interrupts */ + command_high |= 0x4; + if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { + RTE_LOG(ERR, EAL, + "Error disabling interrupts for fd %d\n", + intr_handle->uio_cfg_fd); + return -1; + } + + return 0; +} + +static int +uio_intx_intr_enable(struct rte_intr_handle *intr_handle) +{ + unsigned char command_high; + + /* use UIO config file descriptor for uio_pci_generic */ + if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { + RTE_LOG(ERR, EAL, + "Error reading interrupts status for fd %d\n", + intr_handle->uio_cfg_fd); + return -1; + } + /* enable interrupts */ + command_high &= ~0x4; + if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { + RTE_LOG(ERR, EAL, + "Error enabling interrupts for fd %d\n", + intr_handle->uio_cfg_fd); + return -1; + } + + return 0; +} + +static int +uio_intr_disable(struct rte_intr_handle *intr_handle) +{ + const int value = 0; + + if (write(intr_handle->fd, &value, sizeof(value)) < 0) { + RTE_LOG(ERR, EAL, + "Error disabling interrupts for fd %d (%s)\n", + intr_handle->fd, strerror(errno)); + return -1; + } + return 0; +} + +static int +uio_intr_enable(struct rte_intr_handle *intr_handle) +{ + const int value = 1; + + if (write(intr_handle->fd, &value, sizeof(value)) < 0) { + RTE_LOG(ERR, EAL, + "Error enabling interrupts for fd %d (%s)\n", + intr_handle->fd, strerror(errno)); + return -1; + } + return 0; +} + +int +rte_intr_callback_register(struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, void *cb_arg) +{ + int ret, wake_thread; + struct rte_intr_source *src; + struct rte_intr_callback *callback; + + wake_thread = 0; + + /* first do parameter checking */ + if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) { + RTE_LOG(ERR, EAL, + "Registering with invalid input parameter\n"); + return -EINVAL; + } + + /* allocate a new interrupt callback entity */ + callback = rte_zmalloc("interrupt callback list", + sizeof(*callback), 0); + if (callback == NULL) { + RTE_LOG(ERR, EAL, "Can not allocate memory\n"); + return -ENOMEM; + } + callback->cb_fn = cb; + callback->cb_arg = cb_arg; + + rte_spinlock_lock(&intr_lock); + + /* check if there is at least one callback registered for the fd */ + TAILQ_FOREACH(src, &intr_sources, next) { + if (src->intr_handle.fd == intr_handle->fd) { + /* we had no interrupts for this */ + if TAILQ_EMPTY(&src->callbacks) + wake_thread = 1; + + TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); + ret = 0; + break; + } + } + + /* no existing callbacks for this - add new source */ + if (src == NULL) { + if ((src = rte_zmalloc("interrupt source list", + sizeof(*src), 0)) == NULL) { + RTE_LOG(ERR, EAL, "Can not allocate memory\n"); + rte_free(callback); + ret = -ENOMEM; + } else { + src->intr_handle = *intr_handle; + TAILQ_INIT(&src->callbacks); + TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); + TAILQ_INSERT_TAIL(&intr_sources, src, next); + wake_thread = 1; + ret = 0; + } + } + + rte_spinlock_unlock(&intr_lock); + + /** + * check if need to notify the pipe fd waited by epoll_wait to + * rebuild the wait list. + */ + if (wake_thread) + if (write(intr_pipe.writefd, "1", 1) < 0) + return -EPIPE; + + return ret; +} + +int +rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb_fn, void *cb_arg) +{ + int ret; + struct rte_intr_source *src; + struct rte_intr_callback *cb, *next; + + /* do parameter checking first */ + if (intr_handle == NULL || intr_handle->fd < 0) { + RTE_LOG(ERR, EAL, + "Unregistering with invalid input parameter\n"); + return -EINVAL; + } + + rte_spinlock_lock(&intr_lock); + + /* check if the insterrupt source for the fd is existent */ + TAILQ_FOREACH(src, &intr_sources, next) + if (src->intr_handle.fd == intr_handle->fd) + break; + + /* No interrupt source registered for the fd */ + if (src == NULL) { + ret = -ENOENT; + + /* interrupt source has some active callbacks right now. */ + } else if (src->active != 0) { + ret = -EAGAIN; + + /* ok to remove. */ + } else { + ret = 0; + + /*walk through the callbacks and remove all that match. */ + for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { + + next = TAILQ_NEXT(cb, next); + + if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 || + cb->cb_arg == cb_arg)) { + TAILQ_REMOVE(&src->callbacks, cb, next); + rte_free(cb); + ret++; + } + } + + /* all callbacks for that source are removed. */ + if (TAILQ_EMPTY(&src->callbacks)) { + TAILQ_REMOVE(&intr_sources, src, next); + rte_free(src); + } + } + + rte_spinlock_unlock(&intr_lock); + + /* notify the pipe fd waited by epoll_wait to rebuild the wait list */ + if (ret >= 0 && write(intr_pipe.writefd, "1", 1) < 0) { + ret = -EPIPE; + } + + return ret; +} + +int +rte_intr_enable(struct rte_intr_handle *intr_handle) +{ + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) + return -1; + + switch (intr_handle->type){ + /* write to the uio fd to enable the interrupt */ + case RTE_INTR_HANDLE_UIO: + if (uio_intr_enable(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_UIO_INTX: + if (uio_intx_intr_enable(intr_handle)) + return -1; + break; + /* not used at this moment */ + case RTE_INTR_HANDLE_ALARM: + return -1; +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + if (vfio_enable_msix(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_VFIO_MSI: + if (vfio_enable_msi(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_VFIO_LEGACY: + if (vfio_enable_intx(intr_handle)) + return -1; + break; +#endif + /* unknown handle type */ + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + +int +rte_intr_disable(struct rte_intr_handle *intr_handle) +{ + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) + return -1; + + switch (intr_handle->type){ + /* write to the uio fd to disable the interrupt */ + case RTE_INTR_HANDLE_UIO: + if (uio_intr_disable(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_UIO_INTX: + if (uio_intx_intr_disable(intr_handle)) + return -1; + break; + /* not used at this moment */ + case RTE_INTR_HANDLE_ALARM: + return -1; +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + if (vfio_disable_msix(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_VFIO_MSI: + if (vfio_disable_msi(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_VFIO_LEGACY: + if (vfio_disable_intx(intr_handle)) + return -1; + break; +#endif + /* unknown handle type */ + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + +static int +eal_intr_process_interrupts(struct epoll_event *events, int nfds) +{ + int n, bytes_read; + struct rte_intr_source *src; + struct rte_intr_callback *cb; + union rte_intr_read_buffer buf; + struct rte_intr_callback active_cb; + + for (n = 0; n < nfds; n++) { + + /** + * if the pipe fd is ready to read, return out to + * rebuild the wait list. + */ + if (events[n].data.fd == intr_pipe.readfd){ + int r = read(intr_pipe.readfd, buf.charbuf, + sizeof(buf.charbuf)); + RTE_SET_USED(r); + return -1; + } + rte_spinlock_lock(&intr_lock); + TAILQ_FOREACH(src, &intr_sources, next) + if (src->intr_handle.fd == + events[n].data.fd) + break; + if (src == NULL){ + rte_spinlock_unlock(&intr_lock); + continue; + } + + /* mark this interrupt source as active and release the lock. */ + src->active = 1; + rte_spinlock_unlock(&intr_lock); + + /* set the length to be read dor different handle type */ + switch (src->intr_handle.type) { + case RTE_INTR_HANDLE_UIO: + case RTE_INTR_HANDLE_UIO_INTX: + bytes_read = sizeof(buf.uio_intr_count); + break; + case RTE_INTR_HANDLE_ALARM: + bytes_read = sizeof(buf.timerfd_num); + break; +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + bytes_read = sizeof(buf.vfio_intr_count); + break; +#endif + case RTE_INTR_HANDLE_EXT: + default: + bytes_read = 1; + break; + } + + if (src->intr_handle.type != RTE_INTR_HANDLE_EXT) { + /** + * read out to clear the ready-to-be-read flag + * for epoll_wait. + */ + bytes_read = read(events[n].data.fd, &buf, bytes_read); + if (bytes_read < 0) { + if (errno == EINTR || errno == EWOULDBLOCK) + continue; + + RTE_LOG(ERR, EAL, "Error reading from file " + "descriptor %d: %s\n", + events[n].data.fd, + strerror(errno)); + } else if (bytes_read == 0) + RTE_LOG(ERR, EAL, "Read nothing from file " + "descriptor %d\n", events[n].data.fd); + } + + /* grab a lock, again to call callbacks and update status. */ + rte_spinlock_lock(&intr_lock); + + if (bytes_read > 0) { + + /* Finally, call all callbacks. */ + TAILQ_FOREACH(cb, &src->callbacks, next) { + + /* make a copy and unlock. */ + active_cb = *cb; + rte_spinlock_unlock(&intr_lock); + + /* call the actual callback */ + active_cb.cb_fn(&src->intr_handle, + active_cb.cb_arg); + + /*get the lock back. */ + rte_spinlock_lock(&intr_lock); + } + } + + /* we done with that interrupt source, release it. */ + src->active = 0; + rte_spinlock_unlock(&intr_lock); + } + + return 0; +} + +/** + * It handles all the interrupts. + * + * @param pfd + * epoll file descriptor. + * @param totalfds + * The number of file descriptors added in epoll. + * + * @return + * void + */ +static void +eal_intr_handle_interrupts(int pfd, unsigned totalfds) +{ + struct epoll_event events[totalfds]; + int nfds = 0; + + for(;;) { + nfds = epoll_wait(pfd, events, totalfds, + EAL_INTR_EPOLL_WAIT_FOREVER); + /* epoll_wait fail */ + if (nfds < 0) { + if (errno == EINTR) + continue; + RTE_LOG(ERR, EAL, + "epoll_wait returns with fail\n"); + return; + } + /* epoll_wait timeout, will never happens here */ + else if (nfds == 0) + continue; + /* epoll_wait has at least one fd ready to read */ + if (eal_intr_process_interrupts(events, nfds) < 0) + return; + } +} + +/** + * It builds/rebuilds up the epoll file descriptor with all the + * file descriptors being waited on. Then handles the interrupts. + * + * @param arg + * pointer. (unused) + * + * @return + * never return; + */ +static __attribute__((noreturn)) void * +eal_intr_thread_main(__rte_unused void *arg) +{ + struct epoll_event ev; + + /* host thread, never break out */ + for (;;) { + /* build up the epoll fd with all descriptors we are to + * wait on then pass it to the handle_interrupts function + */ + static struct epoll_event pipe_event = { + .events = EPOLLIN | EPOLLPRI, + }; + struct rte_intr_source *src; + unsigned numfds = 0; + + /* create epoll fd */ + int pfd = epoll_create(1); + if (pfd < 0) + rte_panic("Cannot create epoll instance\n"); + + pipe_event.data.fd = intr_pipe.readfd; + /** + * add pipe fd into wait list, this pipe is used to + * rebuild the wait list. + */ + if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd, + &pipe_event) < 0) { + rte_panic("Error adding fd to %d epoll_ctl, %s\n", + intr_pipe.readfd, strerror(errno)); + } + numfds++; + + rte_spinlock_lock(&intr_lock); + + TAILQ_FOREACH(src, &intr_sources, next) { + if (src->callbacks.tqh_first == NULL) + continue; /* skip those with no callbacks */ + ev.events = EPOLLIN | EPOLLPRI; + ev.data.fd = src->intr_handle.fd; + + /** + * add all the uio device file descriptor + * into wait list. + */ + if (epoll_ctl(pfd, EPOLL_CTL_ADD, + src->intr_handle.fd, &ev) < 0){ + rte_panic("Error adding fd %d epoll_ctl, %s\n", + src->intr_handle.fd, strerror(errno)); + } + else + numfds++; + } + rte_spinlock_unlock(&intr_lock); + /* serve the interrupt */ + eal_intr_handle_interrupts(pfd, numfds); + + /** + * when we return, we need to rebuild the + * list of fds to monitor. + */ + close(pfd); + } +} + +int +rte_eal_intr_init(void) +{ + int ret = 0, ret_1 = 0; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + /* init the global interrupt source head */ + TAILQ_INIT(&intr_sources); + + /** + * create a pipe which will be waited by epoll and notified to + * rebuild the wait list of epoll. + */ + if (pipe(intr_pipe.pipefd) < 0) + return -1; + + /* create the host thread to wait/handle the interrupt */ + ret = pthread_create(&intr_thread, NULL, + eal_intr_thread_main, NULL); + if (ret != 0) { + RTE_LOG(ERR, EAL, + "Failed to create thread for interrupt handling\n"); + } else { + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, + "eal-intr-thread"); + ret_1 = rte_thread_setname(intr_thread, thread_name); + if (ret_1 != 0) + RTE_LOG(ERR, EAL, + "Failed to set thread name for interrupt handling\n"); + } + + return -ret; +} + +static void +eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) +{ + union rte_intr_read_buffer buf; + int bytes_read = 1; + int nbytes; + + switch (intr_handle->type) { + case RTE_INTR_HANDLE_UIO: + case RTE_INTR_HANDLE_UIO_INTX: + bytes_read = sizeof(buf.uio_intr_count); + break; +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + bytes_read = sizeof(buf.vfio_intr_count); + break; +#endif + default: + bytes_read = 1; + RTE_LOG(INFO, EAL, "unexpected intr type\n"); + break; + } + + /** + * read out to clear the ready-to-be-read flag + * for epoll_wait. + */ + do { + nbytes = read(fd, &buf, bytes_read); + if (nbytes < 0) { + if (errno == EINTR || errno == EWOULDBLOCK || + errno == EAGAIN) + continue; + RTE_LOG(ERR, EAL, + "Error reading from fd %d: %s\n", + fd, strerror(errno)); + } else if (nbytes == 0) + RTE_LOG(ERR, EAL, "Read nothing from fd %d\n", fd); + return; + } while (1); +} + +static int +eal_epoll_process_event(struct epoll_event *evs, unsigned int n, + struct rte_epoll_event *events) +{ + unsigned int i, count = 0; + struct rte_epoll_event *rev; + + for (i = 0; i < n; i++) { + rev = evs[i].data.ptr; + if (!rev || !rte_atomic32_cmpset(&rev->status, RTE_EPOLL_VALID, + RTE_EPOLL_EXEC)) + continue; + + events[count].status = RTE_EPOLL_VALID; + events[count].fd = rev->fd; + events[count].epfd = rev->epfd; + events[count].epdata.event = rev->epdata.event; + events[count].epdata.data = rev->epdata.data; + if (rev->epdata.cb_fun) + rev->epdata.cb_fun(rev->fd, + rev->epdata.cb_arg); + + rte_compiler_barrier(); + rev->status = RTE_EPOLL_VALID; + count++; + } + return count; +} + +static inline int +eal_init_tls_epfd(void) +{ + int pfd = epoll_create(255); + + if (pfd < 0) { + RTE_LOG(ERR, EAL, + "Cannot create epoll instance\n"); + return -1; + } + return pfd; +} + +int +rte_intr_tls_epfd(void) +{ + if (RTE_PER_LCORE(_epfd) == -1) + RTE_PER_LCORE(_epfd) = eal_init_tls_epfd(); + + return RTE_PER_LCORE(_epfd); +} + +int +rte_epoll_wait(int epfd, struct rte_epoll_event *events, + int maxevents, int timeout) +{ + struct epoll_event evs[maxevents]; + int rc; + + if (!events) { + RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n"); + return -1; + } + + /* using per thread epoll fd */ + if (epfd == RTE_EPOLL_PER_THREAD) + epfd = rte_intr_tls_epfd(); + + while (1) { + rc = epoll_wait(epfd, evs, maxevents, timeout); + if (likely(rc > 0)) { + /* epoll_wait has at least one fd ready to read */ + rc = eal_epoll_process_event(evs, rc, events); + break; + } else if (rc < 0) { + if (errno == EINTR) + continue; + /* epoll_wait fail */ + RTE_LOG(ERR, EAL, "epoll_wait returns with fail %s\n", + strerror(errno)); + rc = -1; + break; + } else { + /* rc == 0, epoll_wait timed out */ + break; + } + } + + return rc; +} + +static inline void +eal_epoll_data_safe_free(struct rte_epoll_event *ev) +{ + while (!rte_atomic32_cmpset(&ev->status, RTE_EPOLL_VALID, + RTE_EPOLL_INVALID)) + while (ev->status != RTE_EPOLL_VALID) + rte_pause(); + memset(&ev->epdata, 0, sizeof(ev->epdata)); + ev->fd = -1; + ev->epfd = -1; +} + +int +rte_epoll_ctl(int epfd, int op, int fd, + struct rte_epoll_event *event) +{ + struct epoll_event ev; + + if (!event) { + RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n"); + return -1; + } + + /* using per thread epoll fd */ + if (epfd == RTE_EPOLL_PER_THREAD) + epfd = rte_intr_tls_epfd(); + + if (op == EPOLL_CTL_ADD) { + event->status = RTE_EPOLL_VALID; + event->fd = fd; /* ignore fd in event */ + event->epfd = epfd; + ev.data.ptr = (void *)event; + } + + ev.events = event->epdata.event; + if (epoll_ctl(epfd, op, fd, &ev) < 0) { + RTE_LOG(ERR, EAL, "Error op %d fd %d epoll_ctl, %s\n", + op, fd, strerror(errno)); + if (op == EPOLL_CTL_ADD) + /* rollback status when CTL_ADD fail */ + event->status = RTE_EPOLL_INVALID; + return -1; + } + + if (op == EPOLL_CTL_DEL && event->status != RTE_EPOLL_INVALID) + eal_epoll_data_safe_free(event); + + return 0; +} + +int +rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd, + int op, unsigned int vec, void *data) +{ + struct rte_epoll_event *rev; + struct rte_epoll_data *epdata; + int epfd_op; + unsigned int efd_idx; + int rc = 0; + + efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ? + (vec - RTE_INTR_VEC_RXTX_OFFSET) : vec; + + if (!intr_handle || intr_handle->nb_efd == 0 || + efd_idx >= intr_handle->nb_efd) { + RTE_LOG(ERR, EAL, "Wrong intr vector number.\n"); + return -EPERM; + } + + switch (op) { + case RTE_INTR_EVENT_ADD: + epfd_op = EPOLL_CTL_ADD; + rev = &intr_handle->elist[efd_idx]; + if (rev->status != RTE_EPOLL_INVALID) { + RTE_LOG(INFO, EAL, "Event already been added.\n"); + return -EEXIST; + } + + /* attach to intr vector fd */ + epdata = &rev->epdata; + epdata->event = EPOLLIN | EPOLLPRI | EPOLLET; + epdata->data = data; + epdata->cb_fun = (rte_intr_event_cb_t)eal_intr_proc_rxtx_intr; + epdata->cb_arg = (void *)intr_handle; + rc = rte_epoll_ctl(epfd, epfd_op, + intr_handle->efds[efd_idx], rev); + if (!rc) + RTE_LOG(DEBUG, EAL, + "efd %d associated with vec %d added on epfd %d" + "\n", rev->fd, vec, epfd); + else + rc = -EPERM; + break; + case RTE_INTR_EVENT_DEL: + epfd_op = EPOLL_CTL_DEL; + rev = &intr_handle->elist[efd_idx]; + if (rev->status == RTE_EPOLL_INVALID) { + RTE_LOG(INFO, EAL, "Event does not exist.\n"); + return -EPERM; + } + + rc = rte_epoll_ctl(rev->epfd, epfd_op, rev->fd, rev); + if (rc) + rc = -EPERM; + break; + default: + RTE_LOG(ERR, EAL, "event op type mismatch\n"); + rc = -EPERM; + } + + return rc; +} + +int +rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) +{ + uint32_t i; + int fd; + uint32_t n = RTE_MIN(nb_efd, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); + + assert(nb_efd != 0); + + if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) { + for (i = 0; i < n; i++) { + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, + "can't setup eventfd, error %i (%s)\n", + errno, strerror(errno)); + return -1; + } + intr_handle->efds[i] = fd; + } + intr_handle->nb_efd = n; + intr_handle->max_intr = NB_OTHER_INTR + n; + } else { + intr_handle->efds[0] = intr_handle->fd; + intr_handle->nb_efd = RTE_MIN(nb_efd, 1U); + intr_handle->max_intr = NB_OTHER_INTR; + } + + return 0; +} + +void +rte_intr_efd_disable(struct rte_intr_handle *intr_handle) +{ + uint32_t i; + struct rte_epoll_event *rev; + + for (i = 0; i < intr_handle->nb_efd; i++) { + rev = &intr_handle->elist[i]; + if (rev->status == RTE_EPOLL_INVALID) + continue; + if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) { + /* force free if the entry valid */ + eal_epoll_data_safe_free(rev); + rev->status = RTE_EPOLL_INVALID; + } + } + + if (intr_handle->max_intr > intr_handle->nb_efd) { + for (i = 0; i < intr_handle->nb_efd; i++) + close(intr_handle->efds[i]); + } + intr_handle->nb_efd = 0; + intr_handle->max_intr = 0; +} + +int +rte_intr_dp_is_en(struct rte_intr_handle *intr_handle) +{ + return !(!intr_handle->nb_efd); +} + +int +rte_intr_allow_others(struct rte_intr_handle *intr_handle) +{ + if (!rte_intr_dp_is_en(intr_handle)) + return 1; + else + return !!(intr_handle->max_intr - intr_handle->nb_efd); +} + +int +rte_intr_cap_multiple(struct rte_intr_handle *intr_handle) +{ + if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) + return 1; + + return 0; +} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_ivshmem.c new file mode 100644 index 00000000..589019b1 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_ivshmem.c @@ -0,0 +1,958 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef RTE_LIBRTE_IVSHMEM /* hide it from coverage */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_internal_cfg.h" +#include "eal_private.h" + +#define PCI_VENDOR_ID_IVSHMEM 0x1Af4 +#define PCI_DEVICE_ID_IVSHMEM 0x1110 + +#define IVSHMEM_MAGIC 0x0BADC0DE + +#define IVSHMEM_RESOURCE_PATH "/sys/bus/pci/devices/%04x:%02x:%02x.%x/resource2" +#define IVSHMEM_CONFIG_PATH "/var/run/.%s_ivshmem_config" + +#define PHYS 0x1 +#define VIRT 0x2 +#define IOREMAP 0x4 +#define FULL (PHYS|VIRT|IOREMAP) + +#define METADATA_SIZE_ALIGNED \ + (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz)) + +#define CONTAINS(x,y)\ + (((y).addr_64 >= (x).addr_64) && ((y).addr_64 < (x).addr_64 + (x).len)) + +#define DIM(x) (sizeof(x)/sizeof(x[0])) + +struct ivshmem_pci_device { + char path[PATH_MAX]; + phys_addr_t ioremap_addr; +}; + +/* data type to store in config */ +struct ivshmem_segment { + struct rte_ivshmem_metadata_entry entry; + uint64_t align; + char path[PATH_MAX]; +}; +struct ivshmem_shared_config { + struct ivshmem_segment segment[RTE_MAX_MEMSEG]; + uint32_t segment_idx; + struct ivshmem_pci_device pci_devs[RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS]; + uint32_t pci_devs_idx; +}; +static struct ivshmem_shared_config * ivshmem_config; +static int memseg_idx; +static int pagesz; + +/* Tailq heads to add rings to */ +TAILQ_HEAD(rte_ring_list, rte_tailq_entry); + +/* + * Utility functions + */ + +static int +is_ivshmem_device(struct rte_pci_device * dev) +{ + return (dev->id.vendor_id == PCI_VENDOR_ID_IVSHMEM + && dev->id.device_id == PCI_DEVICE_ID_IVSHMEM); +} + +static void * +map_metadata(int fd, uint64_t len) +{ + size_t metadata_len = sizeof(struct rte_ivshmem_metadata); + size_t aligned_len = METADATA_SIZE_ALIGNED; + + return mmap(NULL, metadata_len, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, len - aligned_len); +} + +static void +unmap_metadata(void * ptr) +{ + munmap(ptr, sizeof(struct rte_ivshmem_metadata)); +} + +static int +has_ivshmem_metadata(int fd, uint64_t len) +{ + struct rte_ivshmem_metadata metadata; + void * ptr; + + ptr = map_metadata(fd, len); + + if (ptr == MAP_FAILED) + return -1; + + metadata = *(struct rte_ivshmem_metadata*) (ptr); + + unmap_metadata(ptr); + + return metadata.magic_number == IVSHMEM_MAGIC; +} + +static void +remove_segment(struct ivshmem_segment * ms, int len, int idx) +{ + int i; + + for (i = idx; i < len - 1; i++) + memcpy(&ms[i], &ms[i+1], sizeof(struct ivshmem_segment)); + memset(&ms[len-1], 0, sizeof(struct ivshmem_segment)); +} + +static int +overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2) +{ + uint64_t start1, end1, start2, end2; + uint64_t p_start1, p_end1, p_start2, p_end2; + uint64_t i_start1, i_end1, i_start2, i_end2; + int result = 0; + + /* gather virtual addresses */ + start1 = mz1->addr_64; + end1 = mz1->addr_64 + mz1->len; + start2 = mz2->addr_64; + end2 = mz2->addr_64 + mz2->len; + + /* gather physical addresses */ + p_start1 = mz1->phys_addr; + p_end1 = mz1->phys_addr + mz1->len; + p_start2 = mz2->phys_addr; + p_end2 = mz2->phys_addr + mz2->len; + + /* gather ioremap addresses */ + i_start1 = mz1->ioremap_addr; + i_end1 = mz1->ioremap_addr + mz1->len; + i_start2 = mz2->ioremap_addr; + i_end2 = mz2->ioremap_addr + mz2->len; + + /* check for overlap in virtual addresses */ + if (start1 >= start2 && start1 < end2) + result |= VIRT; + if (start2 >= start1 && start2 < end1) + result |= VIRT; + + /* check for overlap in physical addresses */ + if (p_start1 >= p_start2 && p_start1 < p_end2) + result |= PHYS; + if (p_start2 >= p_start1 && p_start2 < p_end1) + result |= PHYS; + + /* check for overlap in ioremap addresses */ + if (i_start1 >= i_start2 && i_start1 < i_end2) + result |= IOREMAP; + if (i_start2 >= i_start1 && i_start2 < i_end1) + result |= IOREMAP; + + return result; +} + +static int +adjacent(const struct rte_memzone * mz1, const struct rte_memzone * mz2) +{ + uint64_t start1, end1, start2, end2; + uint64_t p_start1, p_end1, p_start2, p_end2; + uint64_t i_start1, i_end1, i_start2, i_end2; + int result = 0; + + /* gather virtual addresses */ + start1 = mz1->addr_64; + end1 = mz1->addr_64 + mz1->len; + start2 = mz2->addr_64; + end2 = mz2->addr_64 + mz2->len; + + /* gather physical addresses */ + p_start1 = mz1->phys_addr; + p_end1 = mz1->phys_addr + mz1->len; + p_start2 = mz2->phys_addr; + p_end2 = mz2->phys_addr + mz2->len; + + /* gather ioremap addresses */ + i_start1 = mz1->ioremap_addr; + i_end1 = mz1->ioremap_addr + mz1->len; + i_start2 = mz2->ioremap_addr; + i_end2 = mz2->ioremap_addr + mz2->len; + + /* check if segments are virtually adjacent */ + if (start1 == end2) + result |= VIRT; + if (start2 == end1) + result |= VIRT; + + /* check if segments are physically adjacent */ + if (p_start1 == p_end2) + result |= PHYS; + if (p_start2 == p_end1) + result |= PHYS; + + /* check if segments are ioremap-adjacent */ + if (i_start1 == i_end2) + result |= IOREMAP; + if (i_start2 == i_end1) + result |= IOREMAP; + + return result; +} + +static int +has_adjacent_segments(struct ivshmem_segment * ms, int len) +{ + int i, j, a; + + for (i = 0; i < len; i++) + for (j = i + 1; j < len; j++) { + a = adjacent(&ms[i].entry.mz, &ms[j].entry.mz); + + /* check if segments are adjacent virtually and/or physically but + * not ioremap (since that would indicate that they are from + * different PCI devices and thus don't need to be concatenated. + */ + if ((a & (VIRT|PHYS)) > 0 && (a & IOREMAP) == 0) + return 1; + } + return 0; +} + +static int +has_overlapping_segments(struct ivshmem_segment * ms, int len) +{ + int i, j; + + for (i = 0; i < len; i++) + for (j = i + 1; j < len; j++) + if (overlap(&ms[i].entry.mz, &ms[j].entry.mz)) + return 1; + return 0; +} + +static int +seg_compare(const void * a, const void * b) +{ + const struct ivshmem_segment * s1 = (const struct ivshmem_segment*) a; + const struct ivshmem_segment * s2 = (const struct ivshmem_segment*) b; + + /* move unallocated zones to the end */ + if (s1->entry.mz.addr == NULL && s2->entry.mz.addr == NULL) + return 0; + if (s1->entry.mz.addr == 0) + return 1; + if (s2->entry.mz.addr == 0) + return -1; + + return s1->entry.mz.phys_addr > s2->entry.mz.phys_addr; +} + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG +static void +entry_dump(struct rte_ivshmem_metadata_entry *e) +{ + RTE_LOG(DEBUG, EAL, "\tvirt: %p-%p\n", e->mz.addr, + RTE_PTR_ADD(e->mz.addr, e->mz.len)); + RTE_LOG(DEBUG, EAL, "\tphys: 0x%" PRIx64 "-0x%" PRIx64 "\n", + e->mz.phys_addr, + e->mz.phys_addr + e->mz.len); + RTE_LOG(DEBUG, EAL, "\tio: 0x%" PRIx64 "-0x%" PRIx64 "\n", + e->mz.ioremap_addr, + e->mz.ioremap_addr + e->mz.len); + RTE_LOG(DEBUG, EAL, "\tlen: 0x%" PRIx64 "\n", e->mz.len); + RTE_LOG(DEBUG, EAL, "\toff: 0x%" PRIx64 "\n", e->offset); +} +#endif + + + +/* + * Actual useful code + */ + +/* read through metadata mapped from the IVSHMEM device */ +static int +read_metadata(char * path, int path_len, int fd, uint64_t flen) +{ + struct rte_ivshmem_metadata metadata; + struct rte_ivshmem_metadata_entry * entry; + int idx, i; + void * ptr; + + ptr = map_metadata(fd, flen); + + if (ptr == MAP_FAILED) + return -1; + + metadata = *(struct rte_ivshmem_metadata*) (ptr); + + unmap_metadata(ptr); + + RTE_LOG(DEBUG, EAL, "Parsing metadata for \"%s\"\n", metadata.name); + + idx = ivshmem_config->segment_idx; + + for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES && + idx <= RTE_MAX_MEMSEG; i++) { + + if (idx == RTE_MAX_MEMSEG) { + RTE_LOG(ERR, EAL, "Not enough memory segments!\n"); + return -1; + } + + entry = &metadata.entry[i]; + + /* stop on uninitialized memzone */ + if (entry->mz.len == 0) + break; + + /* copy metadata entry */ + memcpy(&ivshmem_config->segment[idx].entry, entry, + sizeof(struct rte_ivshmem_metadata_entry)); + + /* copy path */ + snprintf(ivshmem_config->segment[idx].path, path_len, "%s", path); + + idx++; + } + ivshmem_config->segment_idx = idx; + + return 0; +} + +/* check through each segment and look for adjacent or overlapping ones. */ +static int +cleanup_segments(struct ivshmem_segment * ms, int tbl_len) +{ + struct ivshmem_segment * s, * tmp; + int i, j, concat, seg_adjacent, seg_overlapping; + uint64_t start1, start2, end1, end2, p_start1, p_start2, i_start1, i_start2; + + qsort(ms, tbl_len, sizeof(struct ivshmem_segment), + seg_compare); + + while (has_overlapping_segments(ms, tbl_len) || + has_adjacent_segments(ms, tbl_len)) { + + for (i = 0; i < tbl_len; i++) { + s = &ms[i]; + + concat = 0; + + for (j = i + 1; j < tbl_len; j++) { + tmp = &ms[j]; + + /* check if this segment is overlapping with existing segment, + * or is adjacent to existing segment */ + seg_overlapping = overlap(&s->entry.mz, &tmp->entry.mz); + seg_adjacent = adjacent(&s->entry.mz, &tmp->entry.mz); + + /* check if segments fully overlap or are fully adjacent */ + if ((seg_adjacent == FULL) || (seg_overlapping == FULL)) { + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + RTE_LOG(DEBUG, EAL, "Concatenating segments\n"); + RTE_LOG(DEBUG, EAL, "Segment %i:\n", i); + entry_dump(&s->entry); + RTE_LOG(DEBUG, EAL, "Segment %i:\n", j); + entry_dump(&tmp->entry); +#endif + + start1 = s->entry.mz.addr_64; + start2 = tmp->entry.mz.addr_64; + p_start1 = s->entry.mz.phys_addr; + p_start2 = tmp->entry.mz.phys_addr; + i_start1 = s->entry.mz.ioremap_addr; + i_start2 = tmp->entry.mz.ioremap_addr; + end1 = s->entry.mz.addr_64 + s->entry.mz.len; + end2 = tmp->entry.mz.addr_64 + tmp->entry.mz.len; + + /* settle for minimum start address and maximum length */ + s->entry.mz.addr_64 = RTE_MIN(start1, start2); + s->entry.mz.phys_addr = RTE_MIN(p_start1, p_start2); + s->entry.mz.ioremap_addr = RTE_MIN(i_start1, i_start2); + s->entry.offset = RTE_MIN(s->entry.offset, tmp->entry.offset); + s->entry.mz.len = RTE_MAX(end1, end2) - s->entry.mz.addr_64; + concat = 1; + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + RTE_LOG(DEBUG, EAL, "Resulting segment:\n"); + entry_dump(&s->entry); + +#endif + } + /* if segments not fully overlap, we have an error condition. + * adjacent segments can coexist. + */ + else if (seg_overlapping > 0) { + RTE_LOG(ERR, EAL, "Segments %i and %i overlap!\n", i, j); +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + RTE_LOG(DEBUG, EAL, "Segment %i:\n", i); + entry_dump(&s->entry); + RTE_LOG(DEBUG, EAL, "Segment %i:\n", j); + entry_dump(&tmp->entry); +#endif + return -1; + } + if (concat) + break; + } + /* if we concatenated, remove segment at j */ + if (concat) { + remove_segment(ms, tbl_len, j); + tbl_len--; + break; + } + } + } + + return tbl_len; +} + +static int +create_shared_config(void) +{ + char path[PATH_MAX]; + int fd; + + /* build ivshmem config file path */ + snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH, + internal_config.hugefile_prefix); + + fd = open(path, O_CREAT | O_RDWR, 0600); + + if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open %s: %s\n", path, strerror(errno)); + return -1; + } + + /* try ex-locking first - if the file is locked, we have a problem */ + if (flock(fd, LOCK_EX | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "Locking %s failed: %s\n", path, strerror(errno)); + close(fd); + return -1; + } + + if (ftruncate(fd, sizeof(struct ivshmem_shared_config)) < 0) { + RTE_LOG(ERR, EAL, "ftruncate failed: %s\n", strerror(errno)); + return -1; + } + + ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config), + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (ivshmem_config == MAP_FAILED) + return -1; + + memset(ivshmem_config, 0, sizeof(struct ivshmem_shared_config)); + + /* change the exclusive lock we got earlier to a shared lock */ + if (flock(fd, LOCK_SH | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno)); + return -1; + } + + close(fd); + + return 0; +} + +/* open shared config file and, if present, map the config. + * having no config file is not an error condition, as we later check if + * ivshmem_config is NULL (if it is, that means nothing was mapped). */ +static int +open_shared_config(void) +{ + char path[PATH_MAX]; + int fd; + + /* build ivshmem config file path */ + snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH, + internal_config.hugefile_prefix); + + fd = open(path, O_RDONLY); + + /* if the file doesn't exist, just return success */ + if (fd < 0 && errno == ENOENT) + return 0; + /* else we have an error condition */ + else if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open %s: %s\n", + path, strerror(errno)); + return -1; + } + + /* try ex-locking first - if the lock *does* succeed, this means it's a + * stray config file, so it should be deleted. + */ + if (flock(fd, LOCK_EX | LOCK_NB) != -1) { + + /* if we can't remove the file, something is wrong */ + if (unlink(path) < 0) { + RTE_LOG(ERR, EAL, "Could not remove %s: %s\n", path, + strerror(errno)); + return -1; + } + + /* release the lock */ + flock(fd, LOCK_UN); + close(fd); + + /* return success as having a stray config file is equivalent to not + * having config file at all. + */ + return 0; + } + + ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config), + PROT_READ, MAP_SHARED, fd, 0); + + if (ivshmem_config == MAP_FAILED) + return -1; + + /* place a shared lock on config file */ + if (flock(fd, LOCK_SH | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno)); + return -1; + } + + close(fd); + + return 0; +} + +/* + * This function does the following: + * + * 1) Builds a table of ivshmem_segments with proper offset alignment + * 2) Cleans up that table so that we don't have any overlapping or adjacent + * memory segments + * 3) Creates memsegs from this table and maps them into memory. + */ +static inline int +map_all_segments(void) +{ + struct ivshmem_segment ms_tbl[RTE_MAX_MEMSEG]; + struct ivshmem_pci_device * pci_dev; + struct rte_mem_config * mcfg; + struct ivshmem_segment * seg; + int fd, fd_zero; + unsigned i, j; + struct rte_memzone mz; + struct rte_memseg ms; + void * base_addr; + uint64_t align, len; + phys_addr_t ioremap_addr; + + ioremap_addr = 0; + + memset(ms_tbl, 0, sizeof(ms_tbl)); + memset(&mz, 0, sizeof(struct rte_memzone)); + memset(&ms, 0, sizeof(struct rte_memseg)); + + /* first, build a table of memsegs to map, to avoid failed mmaps due to + * overlaps + */ + for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMSEG; i++) { + if (i == RTE_MAX_MEMSEG) { + RTE_LOG(ERR, EAL, "Too many segments requested!\n"); + return -1; + } + + seg = &ivshmem_config->segment[i]; + + /* copy segment to table */ + memcpy(&ms_tbl[i], seg, sizeof(struct ivshmem_segment)); + + /* find ioremap addr */ + for (j = 0; j < DIM(ivshmem_config->pci_devs); j++) { + pci_dev = &ivshmem_config->pci_devs[j]; + if (!strncmp(pci_dev->path, seg->path, sizeof(pci_dev->path))) { + ioremap_addr = pci_dev->ioremap_addr; + break; + } + } + if (ioremap_addr == 0) { + RTE_LOG(ERR, EAL, "Cannot find ioremap addr!\n"); + return -1; + } + + /* work out alignments */ + align = seg->entry.mz.addr_64 - + RTE_ALIGN_FLOOR(seg->entry.mz.addr_64, 0x1000); + len = RTE_ALIGN_CEIL(seg->entry.mz.len + align, 0x1000); + + /* save original alignments */ + ms_tbl[i].align = align; + + /* create a memory zone */ + mz.addr_64 = seg->entry.mz.addr_64 - align; + mz.len = len; + mz.hugepage_sz = seg->entry.mz.hugepage_sz; + mz.phys_addr = seg->entry.mz.phys_addr - align; + + /* find true physical address */ + mz.ioremap_addr = ioremap_addr + seg->entry.offset - align; + + ms_tbl[i].entry.offset = seg->entry.offset - align; + + memcpy(&ms_tbl[i].entry.mz, &mz, sizeof(struct rte_memzone)); + } + + /* clean up the segments */ + memseg_idx = cleanup_segments(ms_tbl, ivshmem_config->segment_idx); + + if (memseg_idx < 0) + return -1; + + mcfg = rte_eal_get_configuration()->mem_config; + + fd_zero = open("/dev/zero", O_RDWR); + + if (fd_zero < 0) { + RTE_LOG(ERR, EAL, "Cannot open /dev/zero: %s\n", strerror(errno)); + return -1; + } + + /* create memsegs and put them into DPDK memory */ + for (i = 0; i < (unsigned) memseg_idx; i++) { + + seg = &ms_tbl[i]; + + ms.addr_64 = seg->entry.mz.addr_64; + ms.hugepage_sz = seg->entry.mz.hugepage_sz; + ms.len = seg->entry.mz.len; + ms.nchannel = rte_memory_get_nchannel(); + ms.nrank = rte_memory_get_nrank(); + ms.phys_addr = seg->entry.mz.phys_addr; + ms.ioremap_addr = seg->entry.mz.ioremap_addr; + ms.socket_id = seg->entry.mz.socket_id; + + base_addr = mmap(ms.addr, ms.len, + PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_zero, 0); + + if (base_addr == MAP_FAILED || base_addr != ms.addr) { + RTE_LOG(ERR, EAL, "Cannot map /dev/zero!\n"); + return -1; + } + + fd = open(seg->path, O_RDWR); + + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", seg->path, + strerror(errno)); + return -1; + } + + munmap(ms.addr, ms.len); + + base_addr = mmap(ms.addr, ms.len, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, + seg->entry.offset); + + + if (base_addr == MAP_FAILED || base_addr != ms.addr) { + RTE_LOG(ERR, EAL, "Cannot map segment into memory: " + "expected %p got %p (%s)\n", ms.addr, base_addr, + strerror(errno)); + return -1; + } + + RTE_LOG(DEBUG, EAL, "Memory segment mapped: %p (len %" PRIx64 ") at " + "offset 0x%" PRIx64 "\n", + ms.addr, ms.len, seg->entry.offset); + + /* put the pointers back into their real positions using original + * alignment */ + ms.addr_64 += seg->align; + ms.phys_addr += seg->align; + ms.ioremap_addr += seg->align; + ms.len -= seg->align; + + /* at this point, the rest of DPDK memory is not initialized, so we + * expect memsegs to be empty */ + memcpy(&mcfg->memseg[i], &ms, + sizeof(struct rte_memseg)); + + close(fd); + + RTE_LOG(DEBUG, EAL, "IVSHMEM segment found, size: 0x%lx\n", + ms.len); + } + + return 0; +} + +/* this happens at a later stage, after general EAL memory initialization */ +int +rte_eal_ivshmem_obj_init(void) +{ + struct rte_ring_list* ring_list = NULL; + struct rte_mem_config * mcfg; + struct ivshmem_segment * seg; + struct rte_memzone * mz; + struct rte_ring * r; + struct rte_tailq_entry *te; + unsigned i, ms, idx; + uint64_t offset; + + /* secondary process would not need any object discovery - it'll all + * already be in shared config */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY || ivshmem_config == NULL) + return 0; + + /* check that we have an initialised ring tail queue */ + ring_list = RTE_TAILQ_LOOKUP(RTE_TAILQ_RING_NAME, rte_ring_list); + if (ring_list == NULL) { + RTE_LOG(ERR, EAL, "No rte_ring tailq found!\n"); + return -1; + } + + mcfg = rte_eal_get_configuration()->mem_config; + + /* create memzones */ + for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMZONE; i++) { + + seg = &ivshmem_config->segment[i]; + + /* add memzone */ + if (mcfg->memzone_cnt == RTE_MAX_MEMZONE) { + RTE_LOG(ERR, EAL, "No more memory zones available!\n"); + return -1; + } + + idx = mcfg->memzone_cnt; + + RTE_LOG(DEBUG, EAL, "Found memzone: '%s' at %p (len 0x%" PRIx64 ")\n", + seg->entry.mz.name, seg->entry.mz.addr, seg->entry.mz.len); + + memcpy(&mcfg->memzone[idx], &seg->entry.mz, + sizeof(struct rte_memzone)); + + /* find ioremap address */ + for (ms = 0; ms <= RTE_MAX_MEMSEG; ms++) { + if (ms == RTE_MAX_MEMSEG) { + RTE_LOG(ERR, EAL, "Physical address of segment not found!\n"); + return -1; + } + if (CONTAINS(mcfg->memseg[ms], mcfg->memzone[idx])) { + offset = mcfg->memzone[idx].addr_64 - + mcfg->memseg[ms].addr_64; + mcfg->memzone[idx].ioremap_addr = mcfg->memseg[ms].ioremap_addr + + offset; + break; + } + } + + mcfg->memzone_cnt++; + } + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* find rings */ + for (i = 0; i < mcfg->memzone_cnt; i++) { + mz = &mcfg->memzone[i]; + + /* check if memzone has a ring prefix */ + if (strncmp(mz->name, RTE_RING_MZ_PREFIX, + sizeof(RTE_RING_MZ_PREFIX) - 1) != 0) + continue; + + r = (struct rte_ring*) (mz->addr_64); + + te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate ring tailq entry!\n"); + return -1; + } + + te->data = (void *) r; + + TAILQ_INSERT_TAIL(ring_list, te, next); + + RTE_LOG(DEBUG, EAL, "Found ring: '%s' at %p\n", r->name, mz->addr); + } + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + rte_memzone_dump(stdout); + rte_ring_list_dump(stdout); +#endif + + return 0; +} + +/* initialize ivshmem structures */ +int rte_eal_ivshmem_init(void) +{ + struct rte_pci_device * dev; + struct rte_pci_resource * res; + int fd, ret; + char path[PATH_MAX]; + + /* initialize everything to 0 */ + memset(path, 0, sizeof(path)); + ivshmem_config = NULL; + + pagesz = getpagesize(); + + RTE_LOG(DEBUG, EAL, "Searching for IVSHMEM devices...\n"); + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + + if (open_shared_config() < 0) { + RTE_LOG(ERR, EAL, "Could not open IVSHMEM config!\n"); + return -1; + } + } + else { + + TAILQ_FOREACH(dev, &pci_device_list, next) { + + if (is_ivshmem_device(dev)) { + + /* IVSHMEM memory is always on BAR2 */ + res = &dev->mem_resource[2]; + + /* if we don't have a BAR2 */ + if (res->len == 0) + continue; + + /* construct pci device path */ + snprintf(path, sizeof(path), IVSHMEM_RESOURCE_PATH, + dev->addr.domain, dev->addr.bus, dev->addr.devid, + dev->addr.function); + + /* try to find memseg */ + fd = open(path, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", path); + return -1; + } + + /* check if it's a DPDK IVSHMEM device */ + ret = has_ivshmem_metadata(fd, res->len); + + /* is DPDK device */ + if (ret == 1) { + + /* config file creation is deferred until the first + * DPDK device is found. then, it has to be created + * only once. */ + if (ivshmem_config == NULL && + create_shared_config() < 0) { + RTE_LOG(ERR, EAL, "Could not create IVSHMEM config!\n"); + close(fd); + return -1; + } + + if (read_metadata(path, sizeof(path), fd, res->len) < 0) { + RTE_LOG(ERR, EAL, "Could not read metadata from" + " device %02x:%02x.%x!\n", dev->addr.bus, + dev->addr.devid, dev->addr.function); + close(fd); + return -1; + } + + if (ivshmem_config->pci_devs_idx == RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS) { + RTE_LOG(WARNING, EAL, + "IVSHMEM PCI device limit exceeded. Increase " + "CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS in " + "your config file.\n"); + break; + } + + RTE_LOG(INFO, EAL, "Found IVSHMEM device %02x:%02x.%x\n", + dev->addr.bus, dev->addr.devid, dev->addr.function); + + ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].ioremap_addr = res->phys_addr; + snprintf(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path, + sizeof(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path), + "%s", path); + + ivshmem_config->pci_devs_idx++; + } + /* failed to read */ + else if (ret < 0) { + RTE_LOG(ERR, EAL, "Could not read IVSHMEM device: %s\n", + strerror(errno)); + close(fd); + return -1; + } + /* not a DPDK device */ + else + RTE_LOG(DEBUG, EAL, "Skipping non-DPDK IVSHMEM device\n"); + + /* close the BAR fd */ + close(fd); + } + } + } + + /* ivshmem_config is not NULL only if config was created and/or mapped */ + if (ivshmem_config) { + if (map_all_segments() < 0) { + RTE_LOG(ERR, EAL, "Mapping IVSHMEM segments failed!\n"); + return -1; + } + } + else { + RTE_LOG(DEBUG, EAL, "No IVSHMEM configuration found! \n"); + } + + return 0; +} + +#endif diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_lcore.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_lcore.c new file mode 100644 index 00000000..de5b4260 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_lcore.c @@ -0,0 +1,110 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_filesystem.h" +#include "eal_thread.h" + +#define SYS_CPU_DIR "/sys/devices/system/cpu/cpu%u" +#define CORE_ID_FILE "topology/core_id" +#define NUMA_NODE_PATH "/sys/devices/system/node" + +/* Check if a cpu is present by the presence of the cpu information for it */ +int +eal_cpu_detected(unsigned lcore_id) +{ + char path[PATH_MAX]; + int len = snprintf(path, sizeof(path), SYS_CPU_DIR + "/"CORE_ID_FILE, lcore_id); + if (len <= 0 || (unsigned)len >= sizeof(path)) + return 0; + if (access(path, F_OK) != 0) + return 0; + + return 1; +} + +/* + * Get CPU socket id (NUMA node) for a logical core. + * + * This searches each nodeX directories in /sys for the symlink for the given + * lcore_id and returns the numa node where the lcore is found. If lcore is not + * found on any numa node, returns zero. + */ +unsigned +eal_cpu_socket_id(unsigned lcore_id) +{ + unsigned socket; + + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "%s/node%u/cpu%u", NUMA_NODE_PATH, + socket, lcore_id); + if (access(path, F_OK) == 0) + return socket; + } + return 0; +} + +/* Get the cpu core id value from the /sys/.../cpuX core_id value */ +unsigned +eal_cpu_core_id(unsigned lcore_id) +{ + char path[PATH_MAX]; + unsigned long id; + + int len = snprintf(path, sizeof(path), SYS_CPU_DIR "/%s", lcore_id, CORE_ID_FILE); + if (len <= 0 || (unsigned)len >= sizeof(path)) + goto err; + if (eal_parse_sysfs_value(path, &id) != 0) + goto err; + return (unsigned)id; + +err: + RTE_LOG(ERR, EAL, "Error reading core id value from %s " + "for lcore %u - assuming core 0\n", SYS_CPU_DIR, lcore_id); + return 0; +} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_log.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_log.c new file mode 100644 index 00000000..0b133c3e --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_log.c @@ -0,0 +1,146 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" + +/* + * default log function, used once mempool (hence log history) is + * available + */ +static ssize_t +console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) +{ + char copybuf[BUFSIZ + 1]; + ssize_t ret; + uint32_t loglevel; + + /* add this log in history */ + rte_log_add_in_history(buf, size); + + /* write on stdout */ + ret = fwrite(buf, 1, size, stdout); + fflush(stdout); + + /* truncate message if too big (should not happen) */ + if (size > BUFSIZ) + size = BUFSIZ; + + /* Syslog error levels are from 0 to 7, so subtract 1 to convert */ + loglevel = rte_log_cur_msg_loglevel() - 1; + memcpy(copybuf, buf, size); + copybuf[size] = '\0'; + + /* write on syslog too */ + syslog(loglevel, "%s", copybuf); + + return ret; +} + +static cookie_io_functions_t console_log_func = { + .write = console_log_write, +}; + +/* + * set the log to default function, called during eal init process, + * once memzones are available. + */ +int +rte_eal_log_init(const char *id, int facility) +{ + FILE *log_stream; + + log_stream = fopencookie(NULL, "w+", console_log_func); + if (log_stream == NULL) + return -1; + + openlog(id, LOG_NDELAY | LOG_PID, facility); + + if (rte_eal_common_log_init(log_stream) < 0) + return -1; + + return 0; +} + +/* early logs */ + +/* + * early log function, used during boot when mempool (hence log + * history) is not available + */ +static ssize_t +early_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) +{ + ssize_t ret; + ret = fwrite(buf, size, 1, stdout); + fflush(stdout); + if (ret == 0) + return -1; + return ret; +} + +static cookie_io_functions_t early_log_func = { + .write = early_log_write, +}; +static FILE *early_log_stream; + +/* + * init the log library, called by rte_eal_init() to enable early + * logs + */ +int +rte_eal_log_early_init(void) +{ + early_log_stream = fopencookie(NULL, "w+", early_log_func); + if (early_log_stream == NULL) { + printf("Cannot configure early_log_stream\n"); + return -1; + } + rte_openlog_stream(early_log_stream); + return 0; +} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_memory.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_memory.c new file mode 100644 index 00000000..846fd31f --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -0,0 +1,1599 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* BSD LICENSE + * + * Copyright(c) 2013 6WIND. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define _FILE_OFFSET_BITS 64 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_internal_cfg.h" +#include "eal_filesystem.h" +#include "eal_hugepages.h" + +#ifdef RTE_LIBRTE_XEN_DOM0 +int rte_xen_dom0_supported(void) +{ + return internal_config.xen_dom0_support; +} +#endif + +/** + * @file + * Huge page mapping under linux + * + * To reserve a big contiguous amount of memory, we use the hugepage + * feature of linux. For that, we need to have hugetlbfs mounted. This + * code will create many files in this directory (one per page) and + * map them in virtual memory. For each page, we will retrieve its + * physical address and remap it in order to have a virtual contiguous + * zone as well as a physical contiguous zone. + */ + +static uint64_t baseaddr_offset; + +static unsigned proc_pagemap_readable; + +#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" + +static void +test_proc_pagemap_readable(void) +{ + int fd = open("/proc/self/pagemap", O_RDONLY); + + if (fd < 0) { + RTE_LOG(ERR, EAL, + "Cannot open /proc/self/pagemap: %s. " + "virt2phys address translation will not work\n", + strerror(errno)); + return; + } + + /* Is readable */ + close(fd); + proc_pagemap_readable = 1; +} + +/* Lock page in physical memory and prevent from swapping. */ +int +rte_mem_lock_page(const void *virt) +{ + unsigned long virtual = (unsigned long)virt; + int page_size = getpagesize(); + unsigned long aligned = (virtual & ~ (page_size - 1)); + return mlock((void*)aligned, page_size); +} + +/* + * Get physical address of any mapped virtual address in the current process. + */ +phys_addr_t +rte_mem_virt2phy(const void *virtaddr) +{ + int fd; + uint64_t page, physaddr; + unsigned long virt_pfn; + int page_size; + off_t offset; + + /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */ + if (!proc_pagemap_readable) + return RTE_BAD_PHYS_ADDR; + + /* standard page size */ + page_size = getpagesize(); + + fd = open("/proc/self/pagemap", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n", + __func__, strerror(errno)); + return RTE_BAD_PHYS_ADDR; + } + + virt_pfn = (unsigned long)virtaddr / page_size; + offset = sizeof(uint64_t) * virt_pfn; + if (lseek(fd, offset, SEEK_SET) == (off_t) -1) { + RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n", + __func__, strerror(errno)); + close(fd); + return RTE_BAD_PHYS_ADDR; + } + if (read(fd, &page, sizeof(uint64_t)) < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n", + __func__, strerror(errno)); + close(fd); + return RTE_BAD_PHYS_ADDR; + } + + /* + * the pfn (page frame number) are bits 0-54 (see + * pagemap.txt in linux Documentation) + */ + physaddr = ((page & 0x7fffffffffffffULL) * page_size) + + ((unsigned long)virtaddr % page_size); + close(fd); + return physaddr; +} + +/* + * For each hugepage in hugepg_tbl, fill the physaddr value. We find + * it by browsing the /proc/self/pagemap special file. + */ +static int +find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + unsigned i; + phys_addr_t addr; + + for (i = 0; i < hpi->num_pages[0]; i++) { + addr = rte_mem_virt2phy(hugepg_tbl[i].orig_va); + if (addr == RTE_BAD_PHYS_ADDR) + return -1; + hugepg_tbl[i].physaddr = addr; + } + return 0; +} + +/* + * Check whether address-space layout randomization is enabled in + * the kernel. This is important for multi-process as it can prevent + * two processes mapping data to the same virtual address + * Returns: + * 0 - address space randomization disabled + * 1/2 - address space randomization enabled + * negative error code on error + */ +static int +aslr_enabled(void) +{ + char c; + int retval, fd = open(RANDOMIZE_VA_SPACE_FILE, O_RDONLY); + if (fd < 0) + return -errno; + retval = read(fd, &c, 1); + close(fd); + if (retval < 0) + return -errno; + if (retval == 0) + return -EIO; + switch (c) { + case '0' : return 0; + case '1' : return 1; + case '2' : return 2; + default: return -EINVAL; + } +} + +/* + * Try to mmap *size bytes in /dev/zero. If it is successful, return the + * pointer to the mmap'd area and keep *size unmodified. Else, retry + * with a smaller zone: decrease *size by hugepage_sz until it reaches + * 0. In this case, return NULL. Note: this function returns an address + * which is a multiple of hugepage size. + */ +static void * +get_virtual_area(size_t *size, size_t hugepage_sz) +{ + void *addr; + int fd; + long aligned_addr; + + if (internal_config.base_virtaddr != 0) { + addr = (void*) (uintptr_t) (internal_config.base_virtaddr + + baseaddr_offset); + } + else addr = NULL; + + RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size); + + fd = open("/dev/zero", O_RDONLY); + if (fd < 0){ + RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n"); + return NULL; + } + do { + addr = mmap(addr, + (*size) + hugepage_sz, PROT_READ, MAP_PRIVATE, fd, 0); + if (addr == MAP_FAILED) + *size -= hugepage_sz; + } while (addr == MAP_FAILED && *size > 0); + + if (addr == MAP_FAILED) { + close(fd); + RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n", + strerror(errno)); + return NULL; + } + + munmap(addr, (*size) + hugepage_sz); + close(fd); + + /* align addr to a huge page size boundary */ + aligned_addr = (long)addr; + aligned_addr += (hugepage_sz - 1); + aligned_addr &= (~(hugepage_sz - 1)); + addr = (void *)(aligned_addr); + + RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n", + addr, *size); + + /* increment offset */ + baseaddr_offset += *size; + + return addr; +} + +/* + * Mmap all hugepages of hugepage table: it first open a file in + * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the + * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored + * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to + * map continguous physical blocks in contiguous virtual blocks. + */ +static int +map_all_hugepages(struct hugepage_file *hugepg_tbl, + struct hugepage_info *hpi, int orig) +{ + int fd; + unsigned i; + void *virtaddr; + void *vma_addr = NULL; + size_t vma_len = 0; + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + RTE_SET_USED(vma_len); +#endif + + for (i = 0; i < hpi->num_pages[0]; i++) { + uint64_t hugepage_sz = hpi->hugepage_sz; + + if (orig) { + hugepg_tbl[i].file_id = i; + hugepg_tbl[i].size = hugepage_sz; +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + eal_get_hugefile_temp_path(hugepg_tbl[i].filepath, + sizeof(hugepg_tbl[i].filepath), hpi->hugedir, + hugepg_tbl[i].file_id); +#else + eal_get_hugefile_path(hugepg_tbl[i].filepath, + sizeof(hugepg_tbl[i].filepath), hpi->hugedir, + hugepg_tbl[i].file_id); +#endif + hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0'; + } +#ifndef RTE_ARCH_64 + /* for 32-bit systems, don't remap 1G and 16G pages, just reuse + * original map address as final map address. + */ + else if ((hugepage_sz == RTE_PGSIZE_1G) + || (hugepage_sz == RTE_PGSIZE_16G)) { + hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; + hugepg_tbl[i].orig_va = NULL; + continue; + } +#endif + +#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS + else if (vma_len == 0) { + unsigned j, num_pages; + + /* reserve a virtual area for next contiguous + * physical block: count the number of + * contiguous physical pages. */ + for (j = i+1; j < hpi->num_pages[0] ; j++) { +#ifdef RTE_ARCH_PPC_64 + /* The physical addresses are sorted in + * descending order on PPC64 */ + if (hugepg_tbl[j].physaddr != + hugepg_tbl[j-1].physaddr - hugepage_sz) + break; +#else + if (hugepg_tbl[j].physaddr != + hugepg_tbl[j-1].physaddr + hugepage_sz) + break; +#endif + } + num_pages = j - i; + vma_len = num_pages * hugepage_sz; + + /* get the biggest virtual memory area up to + * vma_len. If it fails, vma_addr is NULL, so + * let the kernel provide the address. */ + vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); + if (vma_addr == NULL) + vma_len = hugepage_sz; + } +#endif + + /* try to create hugepage file */ + fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, + strerror(errno)); + return -1; + } + + virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + if (virtaddr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, + strerror(errno)); + close(fd); + return -1; + } + + if (orig) { + hugepg_tbl[i].orig_va = virtaddr; + memset(virtaddr, 0, hugepage_sz); + } + else { + hugepg_tbl[i].final_va = virtaddr; + } + + /* set shared flock on the file. */ + if (flock(fd, LOCK_SH | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", + __func__, strerror(errno)); + close(fd); + return -1; + } + + close(fd); + + vma_addr = (char *)vma_addr + hugepage_sz; + vma_len -= hugepage_sz; + } + return 0; +} + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + +/* + * Remaps all hugepages into single file segments + */ +static int +remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + int fd; + unsigned i = 0, j, num_pages, page_idx = 0; + void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL; + size_t vma_len = 0; + size_t hugepage_sz = hpi->hugepage_sz; + size_t total_size, offset; + char filepath[MAX_HUGEPAGE_PATH]; + phys_addr_t physaddr; + int socket; + + while (i < hpi->num_pages[0]) { + +#ifndef RTE_ARCH_64 + /* for 32-bit systems, don't remap 1G pages and 16G pages, + * just reuse original map address as final map address. + */ + if ((hugepage_sz == RTE_PGSIZE_1G) + || (hugepage_sz == RTE_PGSIZE_16G)) { + hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; + hugepg_tbl[i].orig_va = NULL; + i++; + continue; + } +#endif + + /* reserve a virtual area for next contiguous + * physical block: count the number of + * contiguous physical pages. */ + for (j = i+1; j < hpi->num_pages[0] ; j++) { +#ifdef RTE_ARCH_PPC_64 + /* The physical addresses are sorted in descending + * order on PPC64 */ + if (hugepg_tbl[j].physaddr != + hugepg_tbl[j-1].physaddr - hugepage_sz) + break; +#else + if (hugepg_tbl[j].physaddr != + hugepg_tbl[j-1].physaddr + hugepage_sz) + break; +#endif + } + num_pages = j - i; + vma_len = num_pages * hugepage_sz; + + socket = hugepg_tbl[i].socket_id; + + /* get the biggest virtual memory area up to + * vma_len. If it fails, vma_addr is NULL, so + * let the kernel provide the address. */ + vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); + + /* If we can't find a big enough virtual area, work out how many pages + * we are going to get */ + if (vma_addr == NULL) + j = i + 1; + else if (vma_len != num_pages * hugepage_sz) { + num_pages = vma_len / hugepage_sz; + j = i + num_pages; + + } + + hugepg_tbl[page_idx].file_id = page_idx; + eal_get_hugefile_path(filepath, + sizeof(filepath), + hpi->hugedir, + hugepg_tbl[page_idx].file_id); + + /* try to create hugepage file */ + fd = open(filepath, O_CREAT | O_RDWR, 0755); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); + return -1; + } + + total_size = 0; + for (;i < j; i++) { + + /* unmap current segment */ + if (total_size > 0) + munmap(vma_addr, total_size); + + /* unmap original page */ + munmap(hugepg_tbl[i].orig_va, hugepage_sz); + unlink(hugepg_tbl[i].filepath); + + total_size += hugepage_sz; + + old_addr = vma_addr; + + /* map new, bigger segment */ + vma_addr = mmap(vma_addr, total_size, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (vma_addr == MAP_FAILED || vma_addr != old_addr) { + RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); + close(fd); + return -1; + } + + /* touch the page. this is needed because kernel postpones mapping + * creation until the first page fault. with this, we pin down + * the page and it is marked as used and gets into process' pagemap. + */ + for (offset = 0; offset < total_size; offset += hugepage_sz) + *((volatile uint8_t*) RTE_PTR_ADD(vma_addr, offset)); + } + + /* set shared flock on the file. */ + if (flock(fd, LOCK_SH | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", + __func__, strerror(errno)); + close(fd); + return -1; + } + + snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s", + filepath); + + physaddr = rte_mem_virt2phy(vma_addr); + + if (physaddr == RTE_BAD_PHYS_ADDR) + return -1; + + hugepg_tbl[page_idx].final_va = vma_addr; + + hugepg_tbl[page_idx].physaddr = physaddr; + + hugepg_tbl[page_idx].repeated = num_pages; + + hugepg_tbl[page_idx].socket_id = socket; + + close(fd); + + /* verify the memory segment - that is, check that every VA corresponds + * to the physical address we expect to see + */ + for (offset = 0; offset < vma_len; offset += hugepage_sz) { + uint64_t expected_physaddr; + + expected_physaddr = hugepg_tbl[page_idx].physaddr + offset; + page_addr = RTE_PTR_ADD(vma_addr, offset); + physaddr = rte_mem_virt2phy(page_addr); + + if (physaddr != expected_physaddr) { + RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr " + "at %p (offset 0x%" PRIx64 ": 0x%" PRIx64 + " (expected 0x%" PRIx64 ")\n", + page_addr, offset, physaddr, expected_physaddr); + return -1; + } + } + + /* zero out the whole segment */ + memset(hugepg_tbl[page_idx].final_va, 0, total_size); + + page_idx++; + } + + /* zero out the rest */ + memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file)); + return page_idx; +} +#else/* RTE_EAL_SINGLE_FILE_SEGMENTS=n */ + +/* Unmap all hugepages from original mapping */ +static int +unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + unsigned i; + for (i = 0; i < hpi->num_pages[0]; i++) { + if (hugepg_tbl[i].orig_va) { + munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz); + hugepg_tbl[i].orig_va = NULL; + } + } + return 0; +} +#endif /* RTE_EAL_SINGLE_FILE_SEGMENTS */ + +/* + * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge + * page. + */ +static int +find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + int socket_id; + char *end, *nodestr; + unsigned i, hp_count = 0; + uint64_t virt_addr; + char buf[BUFSIZ]; + char hugedir_str[PATH_MAX]; + FILE *f; + + f = fopen("/proc/self/numa_maps", "r"); + if (f == NULL) { + RTE_LOG(NOTICE, EAL, "cannot open /proc/self/numa_maps," + " consider that all memory is in socket_id 0\n"); + return 0; + } + + snprintf(hugedir_str, sizeof(hugedir_str), + "%s/%s", hpi->hugedir, internal_config.hugefile_prefix); + + /* parse numa map */ + while (fgets(buf, sizeof(buf), f) != NULL) { + + /* ignore non huge page */ + if (strstr(buf, " huge ") == NULL && + strstr(buf, hugedir_str) == NULL) + continue; + + /* get zone addr */ + virt_addr = strtoull(buf, &end, 16); + if (virt_addr == 0 || end == buf) { + RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__); + goto error; + } + + /* get node id (socket id) */ + nodestr = strstr(buf, " N"); + if (nodestr == NULL) { + RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__); + goto error; + } + nodestr += 2; + end = strstr(nodestr, "="); + if (end == NULL) { + RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__); + goto error; + } + end[0] = '\0'; + end = NULL; + + socket_id = strtoul(nodestr, &end, 0); + if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) { + RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__); + goto error; + } + + /* if we find this page in our mappings, set socket_id */ + for (i = 0; i < hpi->num_pages[0]; i++) { + void *va = (void *)(unsigned long)virt_addr; + if (hugepg_tbl[i].orig_va == va) { + hugepg_tbl[i].socket_id = socket_id; + hp_count++; + } + } + } + + if (hp_count < hpi->num_pages[0]) + goto error; + + fclose(f); + return 0; + +error: + fclose(f); + return -1; +} + +/* + * Sort the hugepg_tbl by physical address (lower addresses first on x86, + * higher address first on powerpc). We use a slow algorithm, but we won't + * have millions of pages, and this is only done at init time. + */ +static int +sort_by_physaddr(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + unsigned i, j; + int compare_idx; + uint64_t compare_addr; + struct hugepage_file tmp; + + for (i = 0; i < hpi->num_pages[0]; i++) { + compare_addr = 0; + compare_idx = -1; + + /* + * browse all entries starting at 'i', and find the + * entry with the smallest addr + */ + for (j=i; j< hpi->num_pages[0]; j++) { + + if (compare_addr == 0 || +#ifdef RTE_ARCH_PPC_64 + hugepg_tbl[j].physaddr > compare_addr) { +#else + hugepg_tbl[j].physaddr < compare_addr) { +#endif + compare_addr = hugepg_tbl[j].physaddr; + compare_idx = j; + } + } + + /* should not happen */ + if (compare_idx == -1) { + RTE_LOG(ERR, EAL, "%s(): error in physaddr sorting\n", __func__); + return -1; + } + + /* swap the 2 entries in the table */ + memcpy(&tmp, &hugepg_tbl[compare_idx], + sizeof(struct hugepage_file)); + memcpy(&hugepg_tbl[compare_idx], &hugepg_tbl[i], + sizeof(struct hugepage_file)); + memcpy(&hugepg_tbl[i], &tmp, sizeof(struct hugepage_file)); + } + return 0; +} + +/* + * Uses mmap to create a shared memory area for storage of data + * Used in this file to store the hugepage file map on disk + */ +static void * +create_shared_memory(const char *filename, const size_t mem_size) +{ + void *retval; + int fd = open(filename, O_CREAT | O_RDWR, 0666); + if (fd < 0) + return NULL; + if (ftruncate(fd, mem_size) < 0) { + close(fd); + return NULL; + } + retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + close(fd); + return retval; +} + +/* + * this copies *active* hugepages from one hugepage table to another. + * destination is typically the shared memory. + */ +static int +copy_hugepages_to_shared_mem(struct hugepage_file * dst, int dest_size, + const struct hugepage_file * src, int src_size) +{ + int src_pos, dst_pos = 0; + + for (src_pos = 0; src_pos < src_size; src_pos++) { + if (src[src_pos].final_va != NULL) { + /* error on overflow attempt */ + if (dst_pos == dest_size) + return -1; + memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage_file)); + dst_pos++; + } + } + return 0; +} + +static int +unlink_hugepage_files(struct hugepage_file *hugepg_tbl, + unsigned num_hp_info) +{ + unsigned socket, size; + int page, nrpages = 0; + + /* get total number of hugepages */ + for (size = 0; size < num_hp_info; size++) + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) + nrpages += + internal_config.hugepage_info[size].num_pages[socket]; + + for (page = 0; page < nrpages; page++) { + struct hugepage_file *hp = &hugepg_tbl[page]; + + if (hp->final_va != NULL && unlink(hp->filepath)) { + RTE_LOG(WARNING, EAL, "%s(): Removing %s failed: %s\n", + __func__, hp->filepath, strerror(errno)); + } + } + return 0; +} + +/* + * unmaps hugepages that are not going to be used. since we originally allocate + * ALL hugepages (not just those we need), additional unmapping needs to be done. + */ +static int +unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, + struct hugepage_info *hpi, + unsigned num_hp_info) +{ + unsigned socket, size; + int page, nrpages = 0; + + /* get total number of hugepages */ + for (size = 0; size < num_hp_info; size++) + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) + nrpages += internal_config.hugepage_info[size].num_pages[socket]; + + for (size = 0; size < num_hp_info; size++) { + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { + unsigned pages_found = 0; + + /* traverse until we have unmapped all the unused pages */ + for (page = 0; page < nrpages; page++) { + struct hugepage_file *hp = &hugepg_tbl[page]; + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + /* if this page was already cleared */ + if (hp->final_va == NULL) + continue; +#endif + + /* find a page that matches the criteria */ + if ((hp->size == hpi[size].hugepage_sz) && + (hp->socket_id == (int) socket)) { + + /* if we skipped enough pages, unmap the rest */ + if (pages_found == hpi[size].num_pages[socket]) { + uint64_t unmap_len; + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + unmap_len = hp->size * hp->repeated; +#else + unmap_len = hp->size; +#endif + + /* get start addr and len of the remaining segment */ + munmap(hp->final_va, (size_t) unmap_len); + + hp->final_va = NULL; + if (unlink(hp->filepath) == -1) { + RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n", + __func__, hp->filepath, strerror(errno)); + return -1; + } + } +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + /* else, check how much do we need to map */ + else { + int nr_pg_left = + hpi[size].num_pages[socket] - pages_found; + + /* if we need enough memory to fit into the segment */ + if (hp->repeated <= nr_pg_left) { + pages_found += hp->repeated; + } + /* truncate the segment */ + else { + uint64_t final_size = nr_pg_left * hp->size; + uint64_t seg_size = hp->repeated * hp->size; + + void * unmap_va = RTE_PTR_ADD(hp->final_va, + final_size); + int fd; + + munmap(unmap_va, seg_size - final_size); + + fd = open(hp->filepath, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + hp->filepath, strerror(errno)); + return -1; + } + if (ftruncate(fd, final_size) < 0) { + RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n", + hp->filepath, strerror(errno)); + return -1; + } + close(fd); + + pages_found += nr_pg_left; + hp->repeated = nr_pg_left; + } + } +#else + /* else, lock the page and skip */ + else + pages_found++; +#endif + + } /* match page */ + } /* foreach page */ + } /* foreach socket */ + } /* foreach pagesize */ + + return 0; +} + +static inline uint64_t +get_socket_mem_size(int socket) +{ + uint64_t size = 0; + unsigned i; + + for (i = 0; i < internal_config.num_hugepage_sizes; i++){ + struct hugepage_info *hpi = &internal_config.hugepage_info[i]; + if (hpi->hugedir != NULL) + size += hpi->hugepage_sz * hpi->num_pages[socket]; + } + + return size; +} + +/* + * This function is a NUMA-aware equivalent of calc_num_pages. + * It takes in the list of hugepage sizes and the + * number of pages thereof, and calculates the best number of + * pages of each size to fulfill the request for ram + */ +static int +calc_num_pages_per_socket(uint64_t * memory, + struct hugepage_info *hp_info, + struct hugepage_info *hp_used, + unsigned num_hp_info) +{ + unsigned socket, j, i = 0; + unsigned requested, available; + int total_num_pages = 0; + uint64_t remaining_mem, cur_mem; + uint64_t total_mem = internal_config.memory; + + if (num_hp_info == 0) + return -1; + + /* if specific memory amounts per socket weren't requested */ + if (internal_config.force_sockets == 0) { + int cpu_per_socket[RTE_MAX_NUMA_NODES]; + size_t default_size, total_size; + unsigned lcore_id; + + /* Compute number of cores per socket */ + memset(cpu_per_socket, 0, sizeof(cpu_per_socket)); + RTE_LCORE_FOREACH(lcore_id) { + cpu_per_socket[rte_lcore_to_socket_id(lcore_id)]++; + } + + /* + * Automatically spread requested memory amongst detected sockets according + * to number of cores from cpu mask present on each socket + */ + total_size = internal_config.memory; + for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) { + + /* Set memory amount per socket */ + default_size = (internal_config.memory * cpu_per_socket[socket]) + / rte_lcore_count(); + + /* Limit to maximum available memory on socket */ + default_size = RTE_MIN(default_size, get_socket_mem_size(socket)); + + /* Update sizes */ + memory[socket] = default_size; + total_size -= default_size; + } + + /* + * If some memory is remaining, try to allocate it by getting all + * available memory from sockets, one after the other + */ + for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) { + /* take whatever is available */ + default_size = RTE_MIN(get_socket_mem_size(socket) - memory[socket], + total_size); + + /* Update sizes */ + memory[socket] += default_size; + total_size -= default_size; + } + } + + for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) { + /* skips if the memory on specific socket wasn't requested */ + for (i = 0; i < num_hp_info && memory[socket] != 0; i++){ + hp_used[i].hugedir = hp_info[i].hugedir; + hp_used[i].num_pages[socket] = RTE_MIN( + memory[socket] / hp_info[i].hugepage_sz, + hp_info[i].num_pages[socket]); + + cur_mem = hp_used[i].num_pages[socket] * + hp_used[i].hugepage_sz; + + memory[socket] -= cur_mem; + total_mem -= cur_mem; + + total_num_pages += hp_used[i].num_pages[socket]; + + /* check if we have met all memory requests */ + if (memory[socket] == 0) + break; + + /* check if we have any more pages left at this size, if so + * move on to next size */ + if (hp_used[i].num_pages[socket] == hp_info[i].num_pages[socket]) + continue; + /* At this point we know that there are more pages available that are + * bigger than the memory we want, so lets see if we can get enough + * from other page sizes. + */ + remaining_mem = 0; + for (j = i+1; j < num_hp_info; j++) + remaining_mem += hp_info[j].hugepage_sz * + hp_info[j].num_pages[socket]; + + /* is there enough other memory, if not allocate another page and quit */ + if (remaining_mem < memory[socket]){ + cur_mem = RTE_MIN(memory[socket], + hp_info[i].hugepage_sz); + memory[socket] -= cur_mem; + total_mem -= cur_mem; + hp_used[i].num_pages[socket]++; + total_num_pages++; + break; /* we are done with this socket*/ + } + } + /* if we didn't satisfy all memory requirements per socket */ + if (memory[socket] > 0) { + /* to prevent icc errors */ + requested = (unsigned) (internal_config.socket_mem[socket] / + 0x100000); + available = requested - + ((unsigned) (memory[socket] / 0x100000)); + RTE_LOG(ERR, EAL, "Not enough memory available on socket %u! " + "Requested: %uMB, available: %uMB\n", socket, + requested, available); + return -1; + } + } + + /* if we didn't satisfy total memory requirements */ + if (total_mem > 0) { + requested = (unsigned) (internal_config.memory / 0x100000); + available = requested - (unsigned) (total_mem / 0x100000); + RTE_LOG(ERR, EAL, "Not enough memory available! Requested: %uMB," + " available: %uMB\n", requested, available); + return -1; + } + return total_num_pages; +} + +/* + * Prepare physical memory mapping: fill configuration structure with + * these infos, return 0 on success. + * 1. map N huge pages in separate files in hugetlbfs + * 2. find associated physical addr + * 3. find associated NUMA socket ID + * 4. sort all huge pages by physical address + * 5. remap these N huge pages in the correct order + * 6. unmap the first mapping + * 7. fill memsegs in configuration with contiguous zones + */ +int +rte_eal_hugepage_init(void) +{ + struct rte_mem_config *mcfg; + struct hugepage_file *hugepage, *tmp_hp = NULL; + struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES]; + + uint64_t memory[RTE_MAX_NUMA_NODES]; + + unsigned hp_offset; + int i, j, new_memseg; + int nr_hugefiles, nr_hugepages = 0; + void *addr; +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + int new_pages_count[MAX_HUGEPAGE_SIZES]; +#endif + + test_proc_pagemap_readable(); + + memset(used_hp, 0, sizeof(used_hp)); + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + /* hugetlbfs can be disabled */ + if (internal_config.no_hugetlbfs) { + addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__, + strerror(errno)); + return -1; + } + mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr; + mcfg->memseg[0].addr = addr; + mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; + mcfg->memseg[0].len = internal_config.memory; + mcfg->memseg[0].socket_id = 0; + return 0; + } + +/* check if app runs on Xen Dom0 */ + if (internal_config.xen_dom0_support) { +#ifdef RTE_LIBRTE_XEN_DOM0 + /* use dom0_mm kernel driver to init memory */ + if (rte_xen_dom0_memory_init() < 0) + return -1; + else + return 0; +#endif + } + + /* calculate total number of hugepages available. at this point we haven't + * yet started sorting them so they all are on socket 0 */ + for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { + /* meanwhile, also initialize used_hp hugepage sizes in used_hp */ + used_hp[i].hugepage_sz = internal_config.hugepage_info[i].hugepage_sz; + + nr_hugepages += internal_config.hugepage_info[i].num_pages[0]; + } + + /* + * allocate a memory area for hugepage table. + * this isn't shared memory yet. due to the fact that we need some + * processing done on these pages, shared memory will be created + * at a later stage. + */ + tmp_hp = malloc(nr_hugepages * sizeof(struct hugepage_file)); + if (tmp_hp == NULL) + goto fail; + + memset(tmp_hp, 0, nr_hugepages * sizeof(struct hugepage_file)); + + hp_offset = 0; /* where we start the current page size entries */ + + /* map all hugepages and sort them */ + for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){ + struct hugepage_info *hpi; + + /* + * we don't yet mark hugepages as used at this stage, so + * we just map all hugepages available to the system + * all hugepages are still located on socket 0 + */ + hpi = &internal_config.hugepage_info[i]; + + if (hpi->num_pages[0] == 0) + continue; + + /* map all hugepages available */ + if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 1) < 0){ + RTE_LOG(DEBUG, EAL, "Failed to mmap %u MB hugepages\n", + (unsigned)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + + /* find physical addresses and sockets for each hugepage */ + if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){ + RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n", + (unsigned)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + + if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){ + RTE_LOG(DEBUG, EAL, "Failed to find NUMA socket for %u MB pages\n", + (unsigned)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + + if (sort_by_physaddr(&tmp_hp[hp_offset], hpi) < 0) + goto fail; + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + /* remap all hugepages into single file segments */ + new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi); + if (new_pages_count[i] < 0){ + RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n", + (unsigned)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + + /* we have processed a num of hugepages of this size, so inc offset */ + hp_offset += new_pages_count[i]; +#else + /* remap all hugepages */ + if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){ + RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n", + (unsigned)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + + /* unmap original mappings */ + if (unmap_all_hugepages_orig(&tmp_hp[hp_offset], hpi) < 0) + goto fail; + + /* we have processed a num of hugepages of this size, so inc offset */ + hp_offset += hpi->num_pages[0]; +#endif + } + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + nr_hugefiles = 0; + for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { + nr_hugefiles += new_pages_count[i]; + } +#else + nr_hugefiles = nr_hugepages; +#endif + + + /* clean out the numbers of pages */ + for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) + internal_config.hugepage_info[i].num_pages[j] = 0; + + /* get hugepages for each socket */ + for (i = 0; i < nr_hugefiles; i++) { + int socket = tmp_hp[i].socket_id; + + /* find a hugepage info with right size and increment num_pages */ + const int nb_hpsizes = RTE_MIN(MAX_HUGEPAGE_SIZES, + (int)internal_config.num_hugepage_sizes); + for (j = 0; j < nb_hpsizes; j++) { + if (tmp_hp[i].size == + internal_config.hugepage_info[j].hugepage_sz) { +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + internal_config.hugepage_info[j].num_pages[socket] += + tmp_hp[i].repeated; +#else + internal_config.hugepage_info[j].num_pages[socket]++; +#endif + } + } + } + + /* make a copy of socket_mem, needed for number of pages calculation */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + memory[i] = internal_config.socket_mem[i]; + + /* calculate final number of pages */ + nr_hugepages = calc_num_pages_per_socket(memory, + internal_config.hugepage_info, used_hp, + internal_config.num_hugepage_sizes); + + /* error if not enough memory available */ + if (nr_hugepages < 0) + goto fail; + + /* reporting in! */ + for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { + if (used_hp[i].num_pages[j] > 0) { + RTE_LOG(DEBUG, EAL, + "Requesting %u pages of size %uMB" + " from socket %i\n", + used_hp[i].num_pages[j], + (unsigned) + (used_hp[i].hugepage_sz / 0x100000), + j); + } + } + } + + /* create shared memory */ + hugepage = create_shared_memory(eal_hugepage_info_path(), + nr_hugefiles * sizeof(struct hugepage_file)); + + if (hugepage == NULL) { + RTE_LOG(ERR, EAL, "Failed to create shared memory!\n"); + goto fail; + } + memset(hugepage, 0, nr_hugefiles * sizeof(struct hugepage_file)); + + /* + * unmap pages that we won't need (looks at used_hp). + * also, sets final_va to NULL on pages that were unmapped. + */ + if (unmap_unneeded_hugepages(tmp_hp, used_hp, + internal_config.num_hugepage_sizes) < 0) { + RTE_LOG(ERR, EAL, "Unmapping and locking hugepages failed!\n"); + goto fail; + } + + /* + * copy stuff from malloc'd hugepage* to the actual shared memory. + * this procedure only copies those hugepages that have final_va + * not NULL. has overflow protection. + */ + if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles, + tmp_hp, nr_hugefiles) < 0) { + RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n"); + goto fail; + } + + /* free the hugepage backing files */ + if (internal_config.hugepage_unlink && + unlink_hugepage_files(tmp_hp, internal_config.num_hugepage_sizes) < 0) { + RTE_LOG(ERR, EAL, "Unlinking hugepage files failed!\n"); + goto fail; + } + + /* free the temporary hugepage table */ + free(tmp_hp); + tmp_hp = NULL; + + /* find earliest free memseg - this is needed because in case of IVSHMEM, + * segments might have already been initialized */ + for (j = 0; j < RTE_MAX_MEMSEG; j++) + if (mcfg->memseg[j].addr == NULL) { + /* move to previous segment and exit loop */ + j--; + break; + } + + for (i = 0; i < nr_hugefiles; i++) { + new_memseg = 0; + + /* if this is a new section, create a new memseg */ + if (i == 0) + new_memseg = 1; + else if (hugepage[i].socket_id != hugepage[i-1].socket_id) + new_memseg = 1; + else if (hugepage[i].size != hugepage[i-1].size) + new_memseg = 1; + +#ifdef RTE_ARCH_PPC_64 + /* On PPC64 architecture, the mmap always start from higher + * virtual address to lower address. Here, both the physical + * address and virtual address are in descending order */ + else if ((hugepage[i-1].physaddr - hugepage[i].physaddr) != + hugepage[i].size) + new_memseg = 1; + else if (((unsigned long)hugepage[i-1].final_va - + (unsigned long)hugepage[i].final_va) != hugepage[i].size) + new_memseg = 1; +#else + else if ((hugepage[i].physaddr - hugepage[i-1].physaddr) != + hugepage[i].size) + new_memseg = 1; + else if (((unsigned long)hugepage[i].final_va - + (unsigned long)hugepage[i-1].final_va) != hugepage[i].size) + new_memseg = 1; +#endif + + if (new_memseg) { + j += 1; + if (j == RTE_MAX_MEMSEG) + break; + + mcfg->memseg[j].phys_addr = hugepage[i].physaddr; + mcfg->memseg[j].addr = hugepage[i].final_va; +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated; +#else + mcfg->memseg[j].len = hugepage[i].size; +#endif + mcfg->memseg[j].socket_id = hugepage[i].socket_id; + mcfg->memseg[j].hugepage_sz = hugepage[i].size; + } + /* continuation of previous memseg */ + else { +#ifdef RTE_ARCH_PPC_64 + /* Use the phy and virt address of the last page as segment + * address for IBM Power architecture */ + mcfg->memseg[j].phys_addr = hugepage[i].physaddr; + mcfg->memseg[j].addr = hugepage[i].final_va; +#endif + mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz; + } + hugepage[i].memseg_id = j; + } + + if (i < nr_hugefiles) { + RTE_LOG(ERR, EAL, "Can only reserve %d pages " + "from %d requested\n" + "Current %s=%d is not enough\n" + "Please either increase it or request less amount " + "of memory.\n", + i, nr_hugefiles, RTE_STR(CONFIG_RTE_MAX_MEMSEG), + RTE_MAX_MEMSEG); + return -ENOMEM; + } + + return 0; + +fail: + if (tmp_hp) + free(tmp_hp); + return -1; +} + +/* + * uses fstat to report the size of a file on disk + */ +static off_t +getFileSize(int fd) +{ + struct stat st; + if (fstat(fd, &st) < 0) + return 0; + return st.st_size; +} + +/* + * This creates the memory mappings in the secondary process to match that of + * the server process. It goes through each memory segment in the DPDK runtime + * configuration and finds the hugepages which form that segment, mapping them + * in order to form a contiguous block in the virtual memory space + */ +int +rte_eal_hugepage_attach(void) +{ + const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + const struct hugepage_file *hp = NULL; + unsigned num_hp = 0; + unsigned i, s = 0; /* s used to track the segment number */ + off_t size; + int fd, fd_zero = -1, fd_hugepage = -1; + + if (aslr_enabled() > 0) { + RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization " + "(ASLR) is enabled in the kernel.\n"); + RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory " + "into secondary processes\n"); + } + + test_proc_pagemap_readable(); + + if (internal_config.xen_dom0_support) { +#ifdef RTE_LIBRTE_XEN_DOM0 + if (rte_xen_dom0_memory_attach() < 0) { + RTE_LOG(ERR, EAL,"Failed to attach memory setments of primay " + "process\n"); + return -1; + } + return 0; +#endif + } + + fd_zero = open("/dev/zero", O_RDONLY); + if (fd_zero < 0) { + RTE_LOG(ERR, EAL, "Could not open /dev/zero\n"); + goto error; + } + fd_hugepage = open(eal_hugepage_info_path(), O_RDONLY); + if (fd_hugepage < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path()); + goto error; + } + + /* map all segments into memory to make sure we get the addrs */ + for (s = 0; s < RTE_MAX_MEMSEG; ++s) { + void *base_addr; + + /* + * the first memory segment with len==0 is the one that + * follows the last valid segment. + */ + if (mcfg->memseg[s].len == 0) + break; + +#ifdef RTE_LIBRTE_IVSHMEM + /* + * if segment has ioremap address set, it's an IVSHMEM segment and + * doesn't need mapping as it was already mapped earlier + */ + if (mcfg->memseg[s].ioremap_addr != 0) + continue; +#endif + + /* + * fdzero is mmapped to get a contiguous block of virtual + * addresses of the appropriate memseg size. + * use mmap to get identical addresses as the primary process. + */ + base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len, + PROT_READ, MAP_PRIVATE, fd_zero, 0); + if (base_addr == MAP_FAILED || + base_addr != mcfg->memseg[s].addr) { + RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " + "in /dev/zero to requested address [%p]: '%s'\n", + (unsigned long long)mcfg->memseg[s].len, + mcfg->memseg[s].addr, strerror(errno)); + if (aslr_enabled() > 0) { + RTE_LOG(ERR, EAL, "It is recommended to " + "disable ASLR in the kernel " + "and retry running both primary " + "and secondary processes\n"); + } + goto error; + } + } + + size = getFileSize(fd_hugepage); + hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0); + if (hp == NULL) { + RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); + goto error; + } + + num_hp = size / sizeof(struct hugepage_file); + RTE_LOG(DEBUG, EAL, "Analysing %u files\n", num_hp); + + s = 0; + while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){ + void *addr, *base_addr; + uintptr_t offset = 0; + size_t mapping_size; +#ifdef RTE_LIBRTE_IVSHMEM + /* + * if segment has ioremap address set, it's an IVSHMEM segment and + * doesn't need mapping as it was already mapped earlier + */ + if (mcfg->memseg[s].ioremap_addr != 0) { + s++; + continue; + } +#endif + /* + * free previously mapped memory so we can map the + * hugepages into the space + */ + base_addr = mcfg->memseg[s].addr; + munmap(base_addr, mcfg->memseg[s].len); + + /* find the hugepages for this segment and map them + * we don't need to worry about order, as the server sorted the + * entries before it did the second mmap of them */ + for (i = 0; i < num_hp && offset < mcfg->memseg[s].len; i++){ + if (hp[i].memseg_id == (int)s){ + fd = open(hp[i].filepath, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", + hp[i].filepath); + goto error; + } +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + mapping_size = hp[i].size * hp[i].repeated; +#else + mapping_size = hp[i].size; +#endif + addr = mmap(RTE_PTR_ADD(base_addr, offset), + mapping_size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + close(fd); /* close file both on success and on failure */ + if (addr == MAP_FAILED || + addr != RTE_PTR_ADD(base_addr, offset)) { + RTE_LOG(ERR, EAL, "Could not mmap %s\n", + hp[i].filepath); + goto error; + } + offset+=mapping_size; + } + } + RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s, + (unsigned long long)mcfg->memseg[s].len); + s++; + } + /* unmap the hugepage config file, since we are done using it */ + munmap((void *)(uintptr_t)hp, size); + close(fd_zero); + close(fd_hugepage); + return 0; + +error: + if (fd_zero >= 0) + close(fd_zero); + if (fd_hugepage >= 0) + close(fd_hugepage); + return -1; +} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci.c new file mode 100644 index 00000000..bc5b5bee --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -0,0 +1,656 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_private.h" +#include "eal_pci_init.h" + +/** + * @file + * PCI probing under linux + * + * This code is used to simulate a PCI probe by parsing information in sysfs. + * When a registered device matches a driver, it is then initialized with + * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it). + */ + +/* unbind kernel driver for this device */ +int +pci_unbind_kernel_driver(struct rte_pci_device *dev) +{ + int n; + FILE *f; + char filename[PATH_MAX]; + char buf[BUFSIZ]; + struct rte_pci_addr *loc = &dev->addr; + + /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */ + snprintf(filename, sizeof(filename), + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver/unbind", + loc->domain, loc->bus, loc->devid, loc->function); + + f = fopen(filename, "w"); + if (f == NULL) /* device was not bound */ + return 0; + + n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n", + loc->domain, loc->bus, loc->devid, loc->function); + if ((n < 0) || (n >= (int)sizeof(buf))) { + RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__); + goto error; + } + if (fwrite(buf, n, 1, f) == 0) { + RTE_LOG(ERR, EAL, "%s(): could not write to %s\n", __func__, + filename); + goto error; + } + + fclose(f); + return 0; + +error: + fclose(f); + return -1; +} + +static int +pci_get_kernel_driver_by_path(const char *filename, char *dri_name) +{ + int count; + char path[PATH_MAX]; + char *name; + + if (!filename || !dri_name) + return -1; + + count = readlink(filename, path, PATH_MAX); + if (count >= PATH_MAX) + return -1; + + /* For device does not have a driver */ + if (count < 0) + return 1; + + path[count] = '\0'; + + name = strrchr(path, '/'); + if (name) { + strncpy(dri_name, name + 1, strlen(name + 1) + 1); + return 0; + } + + return -1; +} + +/* Map pci device */ +int +pci_map_device(struct rte_pci_device *dev) +{ + int ret = -1; + + /* try mapping the NIC resources using VFIO if it exists */ + switch (dev->kdrv) { + case RTE_KDRV_VFIO: +#ifdef VFIO_PRESENT + if (pci_vfio_is_enabled()) + ret = pci_vfio_map_resource(dev); +#endif + break; + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + /* map resources for devices that use uio */ + ret = pci_uio_map_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + ret = 1; + break; + } + + return ret; +} + +/* Unmap pci device */ +void +pci_unmap_device(struct rte_pci_device *dev) +{ + /* try unmapping the NIC resources using VFIO if it exists */ + switch (dev->kdrv) { + case RTE_KDRV_VFIO: + RTE_LOG(ERR, EAL, "Hotplug doesn't support vfio yet\n"); + break; + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + /* unmap resources for devices that use uio */ + pci_uio_unmap_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + break; + } +} + +void * +pci_find_max_end_va(void) +{ + const struct rte_memseg *seg = rte_eal_get_physmem_layout(); + const struct rte_memseg *last = seg; + unsigned i = 0; + + for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) { + if (seg->addr == NULL) + break; + + if (seg->addr > last->addr) + last = seg; + + } + return RTE_PTR_ADD(last->addr, last->len); +} + +/* parse the "resource" sysfs file */ +static int +pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) +{ + FILE *f; + char buf[BUFSIZ]; + union pci_resource_info { + struct { + char *phys_addr; + char *end_addr; + char *flags; + }; + char *ptrs[PCI_RESOURCE_FMT_NVAL]; + } res_info; + int i; + uint64_t phys_addr, end_addr, flags; + + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n"); + return -1; + } + + for (i = 0; imem_resource[i].phys_addr = phys_addr; + dev->mem_resource[i].len = end_addr - phys_addr + 1; + /* not mapped for now */ + dev->mem_resource[i].addr = NULL; + } + } + fclose(f); + return 0; + +error: + fclose(f); + return -1; +} + +/* Scan one pci sysfs entry, and fill the devices list from it. */ +static int +pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, + uint8_t devid, uint8_t function) +{ + char filename[PATH_MAX]; + unsigned long tmp; + struct rte_pci_device *dev; + char driver[PATH_MAX]; + int ret; + + dev = malloc(sizeof(*dev)); + if (dev == NULL) + return -1; + + memset(dev, 0, sizeof(*dev)); + dev->addr.domain = domain; + dev->addr.bus = bus; + dev->addr.devid = devid; + dev->addr.function = function; + + /* get vendor id */ + snprintf(filename, sizeof(filename), "%s/vendor", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.vendor_id = (uint16_t)tmp; + + /* get device id */ + snprintf(filename, sizeof(filename), "%s/device", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.device_id = (uint16_t)tmp; + + /* get subsystem_vendor id */ + snprintf(filename, sizeof(filename), "%s/subsystem_vendor", + dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.subsystem_vendor_id = (uint16_t)tmp; + + /* get subsystem_device id */ + snprintf(filename, sizeof(filename), "%s/subsystem_device", + dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.subsystem_device_id = (uint16_t)tmp; + + /* get max_vfs */ + dev->max_vfs = 0; + snprintf(filename, sizeof(filename), "%s/max_vfs", dirname); + if (!access(filename, F_OK) && + eal_parse_sysfs_value(filename, &tmp) == 0) + dev->max_vfs = (uint16_t)tmp; + else { + /* for non igb_uio driver, need kernel version >= 3.8 */ + snprintf(filename, sizeof(filename), + "%s/sriov_numvfs", dirname); + if (!access(filename, F_OK) && + eal_parse_sysfs_value(filename, &tmp) == 0) + dev->max_vfs = (uint16_t)tmp; + } + + /* get numa node */ + snprintf(filename, sizeof(filename), "%s/numa_node", + dirname); + if (access(filename, R_OK) != 0) { + /* if no NUMA support, set default to 0 */ + dev->numa_node = 0; + } else { + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->numa_node = tmp; + } + + /* parse resources */ + snprintf(filename, sizeof(filename), "%s/resource", dirname); + if (pci_parse_sysfs_resource(filename, dev) < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__); + free(dev); + return -1; + } + + /* parse driver */ + snprintf(filename, sizeof(filename), "%s/driver", dirname); + ret = pci_get_kernel_driver_by_path(filename, driver); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Fail to get kernel driver\n"); + free(dev); + return -1; + } + + if (!ret) { + if (!strcmp(driver, "vfio-pci")) + dev->kdrv = RTE_KDRV_VFIO; + else if (!strcmp(driver, "igb_uio")) + dev->kdrv = RTE_KDRV_IGB_UIO; + else if (!strcmp(driver, "uio_pci_generic")) + dev->kdrv = RTE_KDRV_UIO_GENERIC; + else + dev->kdrv = RTE_KDRV_UNKNOWN; + } else + dev->kdrv = RTE_KDRV_UNKNOWN; + + /* device is valid, add in list (sorted) */ + if (TAILQ_EMPTY(&pci_device_list)) { + TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + } else { + struct rte_pci_device *dev2; + int ret; + + TAILQ_FOREACH(dev2, &pci_device_list, next) { + ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); + if (ret > 0) + continue; + + if (ret < 0) { + TAILQ_INSERT_BEFORE(dev2, dev, next); + } else { /* already registered */ + dev2->kdrv = dev->kdrv; + dev2->max_vfs = dev->max_vfs; + memmove(dev2->mem_resource, dev->mem_resource, + sizeof(dev->mem_resource)); + free(dev); + } + return 0; + } + TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + } + + return 0; +} + +/* + * split up a pci address into its constituent parts. + */ +static int +parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain, + uint8_t *bus, uint8_t *devid, uint8_t *function) +{ + /* first split on ':' */ + union splitaddr { + struct { + char *domain; + char *bus; + char *devid; + char *function; + }; + char *str[PCI_FMT_NVAL]; /* last element-separator is "." not ":" */ + } splitaddr; + + char *buf_copy = strndup(buf, bufsize); + if (buf_copy == NULL) + return -1; + + if (rte_strsplit(buf_copy, bufsize, splitaddr.str, PCI_FMT_NVAL, ':') + != PCI_FMT_NVAL - 1) + goto error; + /* final split is on '.' between devid and function */ + splitaddr.function = strchr(splitaddr.devid,'.'); + if (splitaddr.function == NULL) + goto error; + *splitaddr.function++ = '\0'; + + /* now convert to int values */ + errno = 0; + *domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); + *bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); + *devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); + *function = (uint8_t)strtoul(splitaddr.function, NULL, 10); + if (errno != 0) + goto error; + + free(buf_copy); /* free the copy made with strdup */ + return 0; +error: + free(buf_copy); + return -1; +} + +/* + * Scan the content of the PCI bus, and the devices in the devices + * list + */ +int +rte_eal_pci_scan(void) +{ + struct dirent *e; + DIR *dir; + char dirname[PATH_MAX]; + uint16_t domain; + uint8_t bus, devid, function; + + dir = opendir(SYSFS_PCI_DEVICES); + if (dir == NULL) { + RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n", + __func__, strerror(errno)); + return -1; + } + + while ((e = readdir(dir)) != NULL) { + if (e->d_name[0] == '.') + continue; + + if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &domain, + &bus, &devid, &function) != 0) + continue; + + snprintf(dirname, sizeof(dirname), "%s/%s", SYSFS_PCI_DEVICES, + e->d_name); + if (pci_scan_one(dirname, domain, bus, devid, function) < 0) + goto error; + } + closedir(dir); + return 0; + +error: + closedir(dir); + return -1; +} + +#ifdef RTE_PCI_CONFIG +static int +pci_config_extended_tag(struct rte_pci_device *dev) +{ + struct rte_pci_addr *loc = &dev->addr; + char filename[PATH_MAX]; + char buf[BUFSIZ]; + FILE *f; + + /* not configured, let it as is */ + if (strncmp(RTE_PCI_EXTENDED_TAG, "on", 2) != 0 && + strncmp(RTE_PCI_EXTENDED_TAG, "off", 3) != 0) + return 0; + + snprintf(filename, sizeof(filename), + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/" "extended_tag", + loc->domain, loc->bus, loc->devid, loc->function); + f = fopen(filename, "rw+"); + if (!f) + return -1; + + fgets(buf, sizeof(buf), f); + if (strncmp(RTE_PCI_EXTENDED_TAG, "on", 2) == 0) { + /* enable Extended Tag*/ + if (strncmp(buf, "on", 2) != 0) { + fseek(f, 0, SEEK_SET); + fputs("on", f); + } + } else { + /* disable Extended Tag */ + if (strncmp(buf, "off", 3) != 0) { + fseek(f, 0, SEEK_SET); + fputs("off", f); + } + } + fclose(f); + + return 0; +} + +static int +pci_config_max_read_request_size(struct rte_pci_device *dev) +{ + struct rte_pci_addr *loc = &dev->addr; + char filename[PATH_MAX]; + char buf[BUFSIZ], param[BUFSIZ]; + FILE *f; + /* size can be 128, 256, 512, 1024, 2048, 4096 */ + uint32_t max_size = RTE_PCI_MAX_READ_REQUEST_SIZE; + + /* not configured, let it as is */ + if (!max_size) + return 0; + + snprintf(filename, sizeof(filename), + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/" "max_read_request_size", + loc->domain, loc->bus, loc->devid, loc->function); + f = fopen(filename, "rw+"); + if (!f) + return -1; + + fgets(buf, sizeof(buf), f); + snprintf(param, sizeof(param), "%d", max_size); + + /* check if the size to be set is the same as current */ + if (strcmp(buf, param) == 0) { + fclose(f); + return 0; + } + fseek(f, 0, SEEK_SET); + fputs(param, f); + fclose(f); + + return 0; +} + +void +pci_config_space_set(struct rte_pci_device *dev) +{ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return; + + /* configure extended tag */ + pci_config_extended_tag(dev); + + /* configure max read request size */ + pci_config_max_read_request_size(dev); +} +#endif + +/* Read PCI config space. */ +int rte_eal_pci_read_config(const struct rte_pci_device *device, + void *buf, size_t len, off_t offset) +{ + const struct rte_intr_handle *intr_handle = &device->intr_handle; + + switch (intr_handle->type) { + case RTE_INTR_HANDLE_UIO: + case RTE_INTR_HANDLE_UIO_INTX: + return pci_uio_read_config(intr_handle, buf, len, offset); + +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + return pci_vfio_read_config(intr_handle, buf, len, offset); +#endif + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } +} + +/* Write PCI config space. */ +int rte_eal_pci_write_config(const struct rte_pci_device *device, + const void *buf, size_t len, off_t offset) +{ + const struct rte_intr_handle *intr_handle = &device->intr_handle; + + switch (intr_handle->type) { + case RTE_INTR_HANDLE_UIO: + case RTE_INTR_HANDLE_UIO_INTX: + return pci_uio_write_config(intr_handle, buf, len, offset); + +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + return pci_vfio_write_config(intr_handle, buf, len, offset); +#endif + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } +} + +/* Init the PCI EAL subsystem */ +int +rte_eal_pci_init(void) +{ + TAILQ_INIT(&pci_driver_list); + TAILQ_INIT(&pci_device_list); + + /* for debug purposes, PCI can be disabled */ + if (internal_config.no_pci) + return 0; + + if (rte_eal_pci_scan() < 0) { + RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__); + return -1; + } +#ifdef VFIO_PRESENT + pci_vfio_enable(); + + if (pci_vfio_is_enabled()) { + + /* if we are primary process, create a thread to communicate with + * secondary processes. the thread will use a socket to wait for + * requests from secondary process to send open file descriptors, + * because VFIO does not allow multiple open descriptors on a group or + * VFIO container. + */ + if (internal_config.process_type == RTE_PROC_PRIMARY && + pci_vfio_mp_sync_setup() < 0) + return -1; + } +#endif + return 0; +} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_init.h new file mode 100644 index 00000000..a17c7083 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_init.h @@ -0,0 +1,111 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EAL_PCI_INIT_H_ +#define EAL_PCI_INIT_H_ + +#include "eal_vfio.h" + +/* + * Helper function to map PCI resources right after hugepages in virtual memory + */ +extern void *pci_map_addr; +void *pci_find_max_end_va(void); + +int pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res); +void pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res); +int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx); + +int pci_uio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offs); +int pci_uio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offs); + +#ifdef VFIO_PRESENT + +#define VFIO_MAX_GROUPS 64 + +int pci_vfio_enable(void); +int pci_vfio_is_enabled(void); +int pci_vfio_mp_sync_setup(void); + +/* access config space */ +int pci_vfio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offs); +int pci_vfio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offs); + +/* map VFIO resource prototype */ +int pci_vfio_map_resource(struct rte_pci_device *dev); +int pci_vfio_get_group_fd(int iommu_group_fd); +int pci_vfio_get_container_fd(void); + +/* + * Function prototypes for VFIO multiprocess sync functions + */ +int vfio_mp_sync_send_request(int socket, int req); +int vfio_mp_sync_receive_request(int socket); +int vfio_mp_sync_send_fd(int socket, int fd); +int vfio_mp_sync_receive_fd(int socket); +int vfio_mp_sync_connect_to_primary(void); + +/* socket comm protocol definitions */ +#define SOCKET_REQ_CONTAINER 0x100 +#define SOCKET_REQ_GROUP 0x200 +#define SOCKET_OK 0x0 +#define SOCKET_NO_FD 0x1 +#define SOCKET_ERR 0xFF + +/* + * we don't need to store device fd's anywhere since they can be obtained from + * the group fd via an ioctl() call. + */ +struct vfio_group { + int group_no; + int fd; +}; + +struct vfio_config { + int vfio_enabled; + int vfio_container_fd; + int vfio_container_has_dma; + int vfio_group_idx; + struct vfio_group vfio_groups[VFIO_MAX_GROUPS]; +}; + +#endif + +#endif /* EAL_PCI_INIT_H_ */ diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_uio.c new file mode 100644 index 00000000..ac50e13f --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -0,0 +1,365 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_pci_init.h" + +void *pci_map_addr = NULL; + +#define OFF_MAX ((uint64_t)(off_t)-1) + +int +pci_uio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offset) +{ + return pread(intr_handle->uio_cfg_fd, buf, len, offset); +} + +int +pci_uio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offset) +{ + return pwrite(intr_handle->uio_cfg_fd, buf, len, offset); +} + +static int +pci_uio_set_bus_master(int dev_fd) +{ + uint16_t reg; + int ret; + + ret = pread(dev_fd, ®, sizeof(reg), PCI_COMMAND); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, + "Cannot read command from PCI config space!\n"); + return -1; + } + + /* return if bus mastering is already on */ + if (reg & PCI_COMMAND_MASTER) + return 0; + + reg |= PCI_COMMAND_MASTER; + + ret = pwrite(dev_fd, ®, sizeof(reg), PCI_COMMAND); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, + "Cannot write command to PCI config space!\n"); + return -1; + } + + return 0; +} + +static int +pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num) +{ + FILE *f; + char filename[PATH_MAX]; + int ret; + unsigned major, minor; + dev_t dev; + + /* get the name of the sysfs file that contains the major and minor + * of the uio device and read its content */ + snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path); + + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs to get major:minor\n", + __func__); + return -1; + } + + ret = fscanf(f, "%u:%u", &major, &minor); + if (ret != 2) { + RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs to get major:minor\n", + __func__); + fclose(f); + return -1; + } + fclose(f); + + /* create the char device "mknod /dev/uioX c major minor" */ + snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); + dev = makedev(major, minor); + ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev); + if (f == NULL) { + RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n", + __func__, strerror(errno)); + return -1; + } + + return ret; +} + +/* + * Return the uioX char device used for a pci device. On success, return + * the UIO number and fill dstbuf string with the path of the device in + * sysfs. On error, return a negative value. In this case dstbuf is + * invalid. + */ +static int +pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf, + unsigned int buflen) +{ + struct rte_pci_addr *loc = &dev->addr; + unsigned int uio_num; + struct dirent *e; + DIR *dir; + char dirname[PATH_MAX]; + + /* depending on kernel version, uio can be located in uio/uioX + * or uio:uioX */ + + snprintf(dirname, sizeof(dirname), + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio", + loc->domain, loc->bus, loc->devid, loc->function); + + dir = opendir(dirname); + if (dir == NULL) { + /* retry with the parent directory */ + snprintf(dirname, sizeof(dirname), + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT, + loc->domain, loc->bus, loc->devid, loc->function); + dir = opendir(dirname); + + if (dir == NULL) { + RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname); + return -1; + } + } + + /* take the first file starting with "uio" */ + while ((e = readdir(dir)) != NULL) { + /* format could be uio%d ...*/ + int shortprefix_len = sizeof("uio") - 1; + /* ... or uio:uio%d */ + int longprefix_len = sizeof("uio:uio") - 1; + char *endptr; + + if (strncmp(e->d_name, "uio", 3) != 0) + continue; + + /* first try uio%d */ + errno = 0; + uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10); + if (errno == 0 && endptr != (e->d_name + shortprefix_len)) { + snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num); + break; + } + + /* then try uio:uio%d */ + errno = 0; + uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10); + if (errno == 0 && endptr != (e->d_name + longprefix_len)) { + snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num); + break; + } + } + closedir(dir); + + /* No uio resource found */ + if (e == NULL) + return -1; + + /* create uio device if we've been asked to */ + if (internal_config.create_uio_dev && + pci_mknod_uio_dev(dstbuf, uio_num) < 0) + RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num); + + return uio_num; +} + +void +pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res) +{ + rte_free(uio_res); + + if (dev->intr_handle.uio_cfg_fd >= 0) { + close(dev->intr_handle.uio_cfg_fd); + dev->intr_handle.uio_cfg_fd = -1; + } + if (dev->intr_handle.fd) { + close(dev->intr_handle.fd); + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + } +} + +int +pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res) +{ + char dirname[PATH_MAX]; + char cfgname[PATH_MAX]; + char devname[PATH_MAX]; /* contains the /dev/uioX */ + int uio_num; + struct rte_pci_addr *loc; + + loc = &dev->addr; + + /* find uio resource */ + uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname)); + if (uio_num < 0) { + RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " + "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); + return 1; + } + snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num); + + /* save fd if in primary process */ + dev->intr_handle.fd = open(devname, O_RDWR); + if (dev->intr_handle.fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + + snprintf(cfgname, sizeof(cfgname), + "/sys/class/uio/uio%u/device/config", uio_num); + dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR); + if (dev->intr_handle.uio_cfg_fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + cfgname, strerror(errno)); + goto error; + } + + if (dev->kdrv == RTE_KDRV_IGB_UIO) + dev->intr_handle.type = RTE_INTR_HANDLE_UIO; + else { + dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX; + + /* set bus master that is not done by uio_pci_generic */ + if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) { + RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); + goto error; + } + } + + /* allocate the mapping details for secondary processes*/ + *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); + if (*uio_res == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot store uio mmap details\n", __func__); + goto error; + } + + snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname); + memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr)); + + return 0; + +error: + pci_uio_free_resource(dev, *uio_res); + return -1; +} + +int +pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx) +{ + int fd; + char devname[PATH_MAX]; /* contains the /dev/uioX */ + void *mapaddr; + struct rte_pci_addr *loc; + struct pci_map *maps; + + loc = &dev->addr; + maps = uio_res->maps; + + /* update devname for mmap */ + snprintf(devname, sizeof(devname), + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource%d", + loc->domain, loc->bus, loc->devid, + loc->function, res_idx); + + /* allocate memory to keep path */ + maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); + if (maps[map_idx].path == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", + strerror(errno)); + return -1; + } + + /* + * open resource file, to mmap it + */ + fd = open(devname, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + + /* try mapping somewhere close to the end of hugepages */ + if (pci_map_addr == NULL) + pci_map_addr = pci_find_max_end_va(); + + mapaddr = pci_map_resource(pci_map_addr, fd, 0, + (size_t)dev->mem_resource[res_idx].len, 0); + close(fd); + if (mapaddr == MAP_FAILED) + goto error; + + pci_map_addr = RTE_PTR_ADD(mapaddr, + (size_t)dev->mem_resource[res_idx].len); + + maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; + maps[map_idx].size = dev->mem_resource[res_idx].len; + maps[map_idx].addr = mapaddr; + maps[map_idx].offset = 0; + strcpy(maps[map_idx].path, devname); + dev->mem_resource[res_idx].addr = mapaddr; + + return 0; + +error: + rte_free(maps[map_idx].path); + return -1; +} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c new file mode 100644 index 00000000..74f91bad --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c @@ -0,0 +1,928 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_pci_init.h" +#include "eal_vfio.h" + +/** + * @file + * PCI probing under linux (VFIO version) + * + * This code tries to determine if the PCI device is bound to VFIO driver, + * and initialize it (map BARs, set up interrupts) if that's the case. + * + * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y". + */ + +#ifdef VFIO_PRESENT + +#define PAGE_SIZE (sysconf(_SC_PAGESIZE)) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +static struct rte_tailq_elem rte_vfio_tailq = { + .name = "VFIO_RESOURCE_LIST", +}; +EAL_REGISTER_TAILQ(rte_vfio_tailq) + +#define VFIO_DIR "/dev/vfio" +#define VFIO_CONTAINER_PATH "/dev/vfio/vfio" +#define VFIO_GROUP_FMT "/dev/vfio/%u" +#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL) + +/* per-process VFIO config */ +static struct vfio_config vfio_cfg; + +int +pci_vfio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offs) +{ + return pread64(intr_handle->vfio_dev_fd, buf, len, + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); +} + +int +pci_vfio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offs) +{ + return pwrite64(intr_handle->vfio_dev_fd, buf, len, + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); +} + +/* get PCI BAR number where MSI-X interrupts are */ +static int +pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset, + uint32_t *msix_table_size) +{ + int ret; + uint32_t reg; + uint16_t flags; + uint8_t cap_id, cap_offset; + + /* read PCI capability pointer from config space */ + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_CAPABILITY_LIST); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " + "config space!\n"); + return -1; + } + + /* we need first byte */ + cap_offset = reg & 0xFF; + + while (cap_offset) { + + /* read PCI capability ID */ + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI " + "config space!\n"); + return -1; + } + + /* we need first byte */ + cap_id = reg & 0xFF; + + /* if we haven't reached MSI-X, check next capability */ + if (cap_id != PCI_CAP_ID_MSIX) { + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " + "config space!\n"); + return -1; + } + + /* we need second byte */ + cap_offset = (reg & 0xFF00) >> 8; + + continue; + } + /* else, read table offset */ + else { + /* table offset resides in the next 4 bytes */ + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset + 4); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config " + "space!\n"); + return -1; + } + + ret = pread64(fd, &flags, sizeof(flags), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset + 2); + if (ret != sizeof(flags)) { + RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config " + "space!\n"); + return -1; + } + + *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR; + *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET; + *msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE)); + + return 0; + } + } + return 0; +} + +/* set PCI bus mastering */ +static int +pci_vfio_set_bus_master(int dev_fd) +{ + uint16_t reg; + int ret; + + ret = pread64(dev_fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_COMMAND); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); + return -1; + } + + /* set the master bit */ + reg |= PCI_COMMAND_MASTER; + + ret = pwrite64(dev_fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_COMMAND); + + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); + return -1; + } + + return 0; +} + +/* set up DMA mappings */ +static int +pci_vfio_setup_dma_maps(int vfio_container_fd) +{ + const struct rte_memseg *ms = rte_eal_get_physmem_layout(); + int i, ret; + + ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU, + VFIO_TYPE1_IOMMU); + if (ret) { + RTE_LOG(ERR, EAL, " cannot set IOMMU type, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + struct vfio_iommu_type1_dma_map dma_map; + + if (ms[i].addr == NULL) + break; + + memset(&dma_map, 0, sizeof(dma_map)); + dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); + dma_map.vaddr = ms[i].addr_64; + dma_map.size = ms[i].len; + dma_map.iova = ms[i].phys_addr; + dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + + ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); + + if (ret) { + RTE_LOG(ERR, EAL, " cannot set up DMA remapping, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + } + + return 0; +} + +/* set up interrupt support (but not enable interrupts) */ +static int +pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) +{ + int i, ret, intr_idx; + + /* default to invalid index */ + intr_idx = VFIO_PCI_NUM_IRQS; + + /* get interrupt type from internal config (MSI-X by default, can be + * overriden from the command line + */ + switch (internal_config.vfio_intr_mode) { + case RTE_INTR_MODE_MSIX: + intr_idx = VFIO_PCI_MSIX_IRQ_INDEX; + break; + case RTE_INTR_MODE_MSI: + intr_idx = VFIO_PCI_MSI_IRQ_INDEX; + break; + case RTE_INTR_MODE_LEGACY: + intr_idx = VFIO_PCI_INTX_IRQ_INDEX; + break; + /* don't do anything if we want to automatically determine interrupt type */ + case RTE_INTR_MODE_NONE: + break; + default: + RTE_LOG(ERR, EAL, " unknown default interrupt type!\n"); + return -1; + } + + /* start from MSI-X interrupt type */ + for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { + struct vfio_irq_info irq = { .argsz = sizeof(irq) }; + int fd = -1; + + /* skip interrupt modes we don't want */ + if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE && + i != intr_idx) + continue; + + irq.index = i; + + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); + if (ret < 0) { + RTE_LOG(ERR, EAL, " cannot get IRQ info, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* if this vector cannot be used with eventfd, fail if we explicitly + * specified interrupt type, otherwise continue */ + if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { + if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE) { + RTE_LOG(ERR, EAL, + " interrupt vector does not support eventfd!\n"); + return -1; + } else + continue; + } + + /* set up an eventfd for interrupts */ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, " cannot set up eventfd, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + dev->intr_handle.fd = fd; + dev->intr_handle.vfio_dev_fd = vfio_dev_fd; + + switch (i) { + case VFIO_PCI_MSIX_IRQ_INDEX: + internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; + break; + case VFIO_PCI_MSI_IRQ_INDEX: + internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; + break; + case VFIO_PCI_INTX_IRQ_INDEX: + internal_config.vfio_intr_mode = RTE_INTR_MODE_LEGACY; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; + break; + default: + RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); + return -1; + } + + return 0; + } + + /* if we're here, we haven't found a suitable interrupt vector */ + return -1; +} + +/* open container fd or get an existing one */ +int +pci_vfio_get_container_fd(void) +{ + int ret, vfio_container_fd; + + /* if we're in a primary process, try to open the container */ + if (internal_config.process_type == RTE_PROC_PRIMARY) { + vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR); + if (vfio_container_fd < 0) { + RTE_LOG(ERR, EAL, " cannot open VFIO container, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* check VFIO API version */ + ret = ioctl(vfio_container_fd, VFIO_GET_API_VERSION); + if (ret != VFIO_API_VERSION) { + if (ret < 0) + RTE_LOG(ERR, EAL, " could not get VFIO API version, " + "error %i (%s)\n", errno, strerror(errno)); + else + RTE_LOG(ERR, EAL, " unsupported VFIO API version!\n"); + close(vfio_container_fd); + return -1; + } + + /* check if we support IOMMU type 1 */ + ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU); + if (ret != 1) { + if (ret < 0) + RTE_LOG(ERR, EAL, " could not get IOMMU type, " + "error %i (%s)\n", errno, + strerror(errno)); + else + RTE_LOG(ERR, EAL, " unsupported IOMMU type " + "detected in VFIO\n"); + close(vfio_container_fd); + return -1; + } + + return vfio_container_fd; + } else { + /* + * if we're in a secondary process, request container fd from the + * primary process via our socket + */ + int socket_fd; + + socket_fd = vfio_mp_sync_connect_to_primary(); + if (socket_fd < 0) { + RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); + return -1; + } + if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) { + RTE_LOG(ERR, EAL, " cannot request container fd!\n"); + close(socket_fd); + return -1; + } + vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd); + if (vfio_container_fd < 0) { + RTE_LOG(ERR, EAL, " cannot get container fd!\n"); + close(socket_fd); + return -1; + } + close(socket_fd); + return vfio_container_fd; + } + + return -1; +} + +/* open group fd or get an existing one */ +int +pci_vfio_get_group_fd(int iommu_group_no) +{ + int i; + int vfio_group_fd; + char filename[PATH_MAX]; + + /* check if we already have the group descriptor open */ + for (i = 0; i < vfio_cfg.vfio_group_idx; i++) + if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no) + return vfio_cfg.vfio_groups[i].fd; + + /* if primary, try to open the group */ + if (internal_config.process_type == RTE_PROC_PRIMARY) { + snprintf(filename, sizeof(filename), + VFIO_GROUP_FMT, iommu_group_no); + vfio_group_fd = open(filename, O_RDWR); + if (vfio_group_fd < 0) { + /* if file not found, it's not an error */ + if (errno != ENOENT) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename, + strerror(errno)); + return -1; + } + return 0; + } + + /* if the fd is valid, create a new group for it */ + if (vfio_cfg.vfio_group_idx == VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); + return -1; + } + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no; + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd; + return vfio_group_fd; + } + /* if we're in a secondary process, request group fd from the primary + * process via our socket + */ + else { + int socket_fd, ret; + + socket_fd = vfio_mp_sync_connect_to_primary(); + + if (socket_fd < 0) { + RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); + return -1; + } + if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) { + RTE_LOG(ERR, EAL, " cannot request container fd!\n"); + close(socket_fd); + return -1; + } + if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) { + RTE_LOG(ERR, EAL, " cannot send group number!\n"); + close(socket_fd); + return -1; + } + ret = vfio_mp_sync_receive_request(socket_fd); + switch (ret) { + case SOCKET_NO_FD: + close(socket_fd); + return 0; + case SOCKET_OK: + vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd); + /* if we got the fd, return it */ + if (vfio_group_fd > 0) { + close(socket_fd); + return vfio_group_fd; + } + /* fall-through on error */ + default: + RTE_LOG(ERR, EAL, " cannot get container fd!\n"); + close(socket_fd); + return -1; + } + } + return -1; +} + +/* parse IOMMU group number for a PCI device + * returns 1 on success, -1 for errors, 0 for non-existent group + */ +static int +pci_vfio_get_group_no(const char *pci_addr, int *iommu_group_no) +{ + char linkname[PATH_MAX]; + char filename[PATH_MAX]; + char *tok[16], *group_tok, *end; + int ret; + + memset(linkname, 0, sizeof(linkname)); + memset(filename, 0, sizeof(filename)); + + /* try to find out IOMMU group for this device */ + snprintf(linkname, sizeof(linkname), + SYSFS_PCI_DEVICES "/%s/iommu_group", pci_addr); + + ret = readlink(linkname, filename, sizeof(filename)); + + /* if the link doesn't exist, no VFIO for us */ + if (ret < 0) + return 0; + + ret = rte_strsplit(filename, sizeof(filename), + tok, RTE_DIM(tok), '/'); + + if (ret <= 0) { + RTE_LOG(ERR, EAL, " %s cannot get IOMMU group\n", pci_addr); + return -1; + } + + /* IOMMU group is always the last token */ + errno = 0; + group_tok = tok[ret - 1]; + end = group_tok; + *iommu_group_no = strtol(group_tok, &end, 10); + if ((end != group_tok && *end != '\0') || errno != 0) { + RTE_LOG(ERR, EAL, " %s error parsing IOMMU number!\n", pci_addr); + return -1; + } + + return 1; +} + +static void +clear_current_group(void) +{ + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = 0; + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = -1; +} + + +/* + * map the PCI resources of a PCI device in virtual memory (VFIO version). + * primary and secondary processes follow almost exactly the same path + */ +int +pci_vfio_map_resource(struct rte_pci_device *dev) +{ + struct vfio_group_status group_status = { + .argsz = sizeof(group_status) + }; + struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + int vfio_group_fd, vfio_dev_fd; + int iommu_group_no; + char pci_addr[PATH_MAX] = {0}; + struct rte_pci_addr *loc = &dev->addr; + int i, ret, msix_bar; + struct mapped_pci_resource *vfio_res = NULL; + struct mapped_pci_res_list *vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); + + struct pci_map *maps; + uint32_t msix_table_offset = 0; + uint32_t msix_table_size = 0; + + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + + /* store PCI address string */ + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + loc->domain, loc->bus, loc->devid, loc->function); + + /* get group number */ + ret = pci_vfio_get_group_no(pci_addr, &iommu_group_no); + if (ret == 0) { + RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", + pci_addr); + return 1; + } + + /* if negative, something failed */ + if (ret < 0) + return -1; + + /* get the actual group fd */ + vfio_group_fd = pci_vfio_get_group_fd(iommu_group_no); + if (vfio_group_fd < 0) + return -1; + + /* store group fd */ + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no; + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd; + + /* if group_fd == 0, that means the device isn't managed by VFIO */ + if (vfio_group_fd == 0) { + RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", + pci_addr); + /* we store 0 as group fd to distinguish between existing but + * unbound VFIO groups, and groups that don't exist at all. + */ + vfio_cfg.vfio_group_idx++; + return 1; + } + + /* + * at this point, we know at least one port on this device is bound to VFIO, + * so we can proceed to try and set this particular port up + */ + + /* check if the group is viable */ + ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status); + if (ret) { + RTE_LOG(ERR, EAL, " %s cannot get group status, " + "error %i (%s)\n", pci_addr, errno, strerror(errno)); + close(vfio_group_fd); + clear_current_group(); + return -1; + } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) { + RTE_LOG(ERR, EAL, " %s VFIO group is not viable!\n", pci_addr); + close(vfio_group_fd); + clear_current_group(); + return -1; + } + + /* + * at this point, we know that this group is viable (meaning, all devices + * are either bound to VFIO or not bound to anything) + */ + + /* check if group does not have a container yet */ + if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) { + + /* add group to a container */ + ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER, + &vfio_cfg.vfio_container_fd); + if (ret) { + RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, " + "error %i (%s)\n", pci_addr, errno, strerror(errno)); + close(vfio_group_fd); + clear_current_group(); + return -1; + } + /* + * at this point we know that this group has been successfully + * initialized, so we increment vfio_group_idx to indicate that we can + * add new groups. + */ + vfio_cfg.vfio_group_idx++; + } + + /* + * set up DMA mappings for container + * + * needs to be done only once, only when at least one group is assigned to + * a container and only in primary process + */ + if (internal_config.process_type == RTE_PROC_PRIMARY && + vfio_cfg.vfio_container_has_dma == 0) { + ret = pci_vfio_setup_dma_maps(vfio_cfg.vfio_container_fd); + if (ret) { + RTE_LOG(ERR, EAL, " %s DMA remapping failed, " + "error %i (%s)\n", pci_addr, errno, strerror(errno)); + return -1; + } + vfio_cfg.vfio_container_has_dma = 1; + } + + /* get a file descriptor for the device */ + vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, pci_addr); + if (vfio_dev_fd < 0) { + /* if we cannot get a device fd, this simply means that this + * particular port is not bound to VFIO + */ + RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", + pci_addr); + return 1; + } + + /* test and setup the device */ + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_INFO, &device_info); + if (ret) { + RTE_LOG(ERR, EAL, " %s cannot get device info, " + "error %i (%s)\n", pci_addr, errno, strerror(errno)); + close(vfio_dev_fd); + return -1; + } + + /* get MSI-X BAR, if any (we have to know where it is because we can't + * easily mmap it when using VFIO) */ + msix_bar = -1; + ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar, + &msix_table_offset, &msix_table_size); + if (ret < 0) { + RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr); + close(vfio_dev_fd); + return -1; + } + + /* if we're in a primary process, allocate vfio_res and get region info */ + if (internal_config.process_type == RTE_PROC_PRIMARY) { + vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0); + if (vfio_res == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot store uio mmap details\n", __func__); + close(vfio_dev_fd); + return -1; + } + memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr)); + + /* get number of registers (up to BAR5) */ + vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions, + VFIO_PCI_BAR5_REGION_INDEX + 1); + } else { + /* if we're in a secondary process, just find our tailq entry */ + TAILQ_FOREACH(vfio_res, vfio_res_list, next) { + if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr))) + continue; + break; + } + /* if we haven't found our tailq entry, something's wrong */ + if (vfio_res == NULL) { + RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", + pci_addr); + close(vfio_dev_fd); + return -1; + } + } + + /* map BARs */ + maps = vfio_res->maps; + + for (i = 0; i < (int) vfio_res->nb_maps; i++) { + struct vfio_region_info reg = { .argsz = sizeof(reg) }; + void *bar_addr; + struct memreg { + unsigned long offset, size; + } memreg[2] = {}; + + reg.index = i; + + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®); + + if (ret) { + RTE_LOG(ERR, EAL, " %s cannot get device region info " + "error %i (%s)\n", pci_addr, errno, strerror(errno)); + close(vfio_dev_fd); + if (internal_config.process_type == RTE_PROC_PRIMARY) + rte_free(vfio_res); + return -1; + } + + /* skip non-mmapable BARs */ + if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) + continue; + + if (i == msix_bar) { + /* + * VFIO will not let us map the MSI-X table, + * but we can map around it. + */ + uint32_t table_start = msix_table_offset; + uint32_t table_end = table_start + msix_table_size; + table_end = (table_end + ~PAGE_MASK) & PAGE_MASK; + table_start &= PAGE_MASK; + + if (table_start == 0 && table_end >= reg.size) { + /* Cannot map this BAR */ + RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i); + continue; + } else { + memreg[0].offset = reg.offset; + memreg[0].size = table_start; + memreg[1].offset = table_end; + memreg[1].size = reg.size - table_end; + + RTE_LOG(DEBUG, EAL, + "Trying to map BAR %d that contains the MSI-X " + "table. Trying offsets: " + "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", i, + memreg[0].offset, memreg[0].size, + memreg[1].offset, memreg[1].size); + } + } else { + memreg[0].offset = reg.offset; + memreg[0].size = reg.size; + } + + /* try to figure out an address */ + if (internal_config.process_type == RTE_PROC_PRIMARY) { + /* try mapping somewhere close to the end of hugepages */ + if (pci_map_addr == NULL) + pci_map_addr = pci_find_max_end_va(); + + bar_addr = pci_map_addr; + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); + } else { + bar_addr = maps[i].addr; + } + + /* reserve the address using an inaccessible mapping */ + bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (bar_addr != MAP_FAILED) { + void *map_addr = NULL; + if (memreg[0].size) { + /* actual map of first part */ + map_addr = pci_map_resource(bar_addr, vfio_dev_fd, + memreg[0].offset, + memreg[0].size, + MAP_FIXED); + } + + /* if there's a second part, try to map it */ + if (map_addr != MAP_FAILED + && memreg[1].offset && memreg[1].size) { + void *second_addr = RTE_PTR_ADD(bar_addr, memreg[1].offset); + map_addr = pci_map_resource(second_addr, + vfio_dev_fd, memreg[1].offset, + memreg[1].size, + MAP_FIXED); + } + + if (map_addr == MAP_FAILED || !map_addr) { + munmap(bar_addr, reg.size); + bar_addr = MAP_FAILED; + } + } + + if (bar_addr == MAP_FAILED || + (internal_config.process_type == RTE_PROC_SECONDARY && + bar_addr != maps[i].addr)) { + RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i, + strerror(errno)); + close(vfio_dev_fd); + if (internal_config.process_type == RTE_PROC_PRIMARY) + rte_free(vfio_res); + return -1; + } + + maps[i].addr = bar_addr; + maps[i].offset = reg.offset; + maps[i].size = reg.size; + maps[i].path = NULL; /* vfio doesn't have per-resource paths */ + dev->mem_resource[i].addr = bar_addr; + } + + /* if secondary process, do not set up interrupts */ + if (internal_config.process_type == RTE_PROC_PRIMARY) { + if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) { + RTE_LOG(ERR, EAL, " %s error setting up interrupts!\n", pci_addr); + close(vfio_dev_fd); + rte_free(vfio_res); + return -1; + } + + /* set bus mastering for the device */ + if (pci_vfio_set_bus_master(vfio_dev_fd)) { + RTE_LOG(ERR, EAL, " %s cannot set up bus mastering!\n", pci_addr); + close(vfio_dev_fd); + rte_free(vfio_res); + return -1; + } + + /* Reset the device */ + ioctl(vfio_dev_fd, VFIO_DEVICE_RESET); + } + + if (internal_config.process_type == RTE_PROC_PRIMARY) + TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); + + return 0; +} + +int +pci_vfio_enable(void) +{ + /* initialize group list */ + int i; + int module_vfio_type1; + + for (i = 0; i < VFIO_MAX_GROUPS; i++) { + vfio_cfg.vfio_groups[i].fd = -1; + vfio_cfg.vfio_groups[i].group_no = -1; + } + + module_vfio_type1 = rte_eal_check_module("vfio_iommu_type1"); + + /* return error directly */ + if (module_vfio_type1 == -1) { + RTE_LOG(INFO, EAL, "Could not get loaded module details!\n"); + return -1; + } + + /* return 0 if VFIO modules not loaded */ + if (module_vfio_type1 == 0) { + RTE_LOG(INFO, EAL, "VFIO modules not all loaded, " + "skip VFIO support...\n"); + return 0; + } + + vfio_cfg.vfio_container_fd = pci_vfio_get_container_fd(); + + /* check if we have VFIO driver enabled */ + if (vfio_cfg.vfio_container_fd != -1) + vfio_cfg.vfio_enabled = 1; + else + RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n"); + + return 0; +} + +int +pci_vfio_is_enabled(void) +{ + return vfio_cfg.vfio_enabled; +} +#endif diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c new file mode 100644 index 00000000..d9188fde --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c @@ -0,0 +1,405 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +/* sys/un.h with __USE_MISC uses strlen, which is unsafe */ +#ifdef __USE_MISC +#define REMOVED_USE_MISC +#undef __USE_MISC +#endif +#include +/* make sure we redefine __USE_MISC only if it was previously undefined */ +#ifdef REMOVED_USE_MISC +#define __USE_MISC +#undef REMOVED_USE_MISC +#endif + +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_pci_init.h" +#include "eal_thread.h" + +/** + * @file + * VFIO socket for communication between primary and secondary processes. + * + * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y". + */ + +#ifdef VFIO_PRESENT + +#define SOCKET_PATH_FMT "%s/.%s_mp_socket" +#define CMSGLEN (CMSG_LEN(sizeof(int))) +#define FD_TO_CMSGHDR(fd, chdr) \ + do {\ + (chdr).cmsg_len = CMSGLEN;\ + (chdr).cmsg_level = SOL_SOCKET;\ + (chdr).cmsg_type = SCM_RIGHTS;\ + memcpy((chdr).__cmsg_data, &(fd), sizeof(fd));\ + } while (0) +#define CMSGHDR_TO_FD(chdr, fd) \ + memcpy(&(fd), (chdr).__cmsg_data, sizeof(fd)) + +static pthread_t socket_thread; +static int mp_socket_fd; + + +/* get socket path (/var/run if root, $HOME otherwise) */ +static void +get_socket_path(char *buffer, int bufsz) +{ + const char *dir = "/var/run"; + const char *home_dir = getenv("HOME"); + + if (getuid() != 0 && home_dir != NULL) + dir = home_dir; + + /* use current prefix as file path */ + snprintf(buffer, bufsz, SOCKET_PATH_FMT, dir, + internal_config.hugefile_prefix); +} + + + +/* + * data flow for socket comm protocol: + * 1. client sends SOCKET_REQ_CONTAINER or SOCKET_REQ_GROUP + * 1a. in case of SOCKET_REQ_GROUP, client also then sends group number + * 2. server receives message + * 2a. in case of invalid group, SOCKET_ERR is sent back to client + * 2b. in case of unbound group, SOCKET_NO_FD is sent back to client + * 2c. in case of valid group, SOCKET_OK is sent and immediately followed by fd + * + * in case of any error, socket is closed. + */ + +/* send a request, return -1 on error */ +int +vfio_mp_sync_send_request(int socket, int req) +{ + struct msghdr hdr; + struct iovec iov; + int buf; + int ret; + + memset(&hdr, 0, sizeof(hdr)); + + buf = req; + + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + iov.iov_base = (char *) &buf; + iov.iov_len = sizeof(buf); + + ret = sendmsg(socket, &hdr, 0); + if (ret < 0) + return -1; + return 0; +} + +/* receive a request and return it */ +int +vfio_mp_sync_receive_request(int socket) +{ + int buf; + struct msghdr hdr; + struct iovec iov; + int ret, req; + + memset(&hdr, 0, sizeof(hdr)); + + buf = SOCKET_ERR; + + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + iov.iov_base = (char *) &buf; + iov.iov_len = sizeof(buf); + + ret = recvmsg(socket, &hdr, 0); + if (ret < 0) + return -1; + + req = buf; + + return req; +} + +/* send OK in message, fd in control message */ +int +vfio_mp_sync_send_fd(int socket, int fd) +{ + int buf; + struct msghdr hdr; + struct cmsghdr *chdr; + char chdr_buf[CMSGLEN]; + struct iovec iov; + int ret; + + chdr = (struct cmsghdr *) chdr_buf; + memset(chdr, 0, sizeof(chdr_buf)); + memset(&hdr, 0, sizeof(hdr)); + + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + iov.iov_base = (char *) &buf; + iov.iov_len = sizeof(buf); + hdr.msg_control = chdr; + hdr.msg_controllen = CMSGLEN; + + buf = SOCKET_OK; + FD_TO_CMSGHDR(fd, *chdr); + + ret = sendmsg(socket, &hdr, 0); + if (ret < 0) + return -1; + return 0; +} + +/* receive OK in message, fd in control message */ +int +vfio_mp_sync_receive_fd(int socket) +{ + int buf; + struct msghdr hdr; + struct cmsghdr *chdr; + char chdr_buf[CMSGLEN]; + struct iovec iov; + int ret, req, fd; + + buf = SOCKET_ERR; + + chdr = (struct cmsghdr *) chdr_buf; + memset(chdr, 0, sizeof(chdr_buf)); + memset(&hdr, 0, sizeof(hdr)); + + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + iov.iov_base = (char *) &buf; + iov.iov_len = sizeof(buf); + hdr.msg_control = chdr; + hdr.msg_controllen = CMSGLEN; + + ret = recvmsg(socket, &hdr, 0); + if (ret < 0) + return -1; + + req = buf; + + if (req != SOCKET_OK) + return -1; + + CMSGHDR_TO_FD(*chdr, fd); + + return fd; +} + +/* connect socket_fd in secondary process to the primary process's socket */ +int +vfio_mp_sync_connect_to_primary(void) +{ + struct sockaddr_un addr; + socklen_t sockaddr_len; + int socket_fd; + + /* set up a socket */ + socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); + if (socket_fd < 0) { + RTE_LOG(ERR, EAL, "Failed to create socket!\n"); + return -1; + } + + get_socket_path(addr.sun_path, sizeof(addr.sun_path)); + addr.sun_family = AF_UNIX; + + sockaddr_len = sizeof(struct sockaddr_un); + + if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0) + return socket_fd; + + /* if connect failed */ + close(socket_fd); + return -1; +} + + + +/* + * socket listening thread for primary process + */ +static __attribute__((noreturn)) void * +pci_vfio_mp_sync_thread(void __rte_unused * arg) +{ + int ret, fd, vfio_group_no; + + /* wait for requests on the socket */ + for (;;) { + int conn_sock; + struct sockaddr_un addr; + socklen_t sockaddr_len = sizeof(addr); + + /* this is a blocking call */ + conn_sock = accept(mp_socket_fd, (struct sockaddr *) &addr, + &sockaddr_len); + + /* just restart on error */ + if (conn_sock == -1) + continue; + + /* set socket to linger after close */ + struct linger l; + l.l_onoff = 1; + l.l_linger = 60; + setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)); + + ret = vfio_mp_sync_receive_request(conn_sock); + + switch (ret) { + case SOCKET_REQ_CONTAINER: + fd = pci_vfio_get_container_fd(); + if (fd < 0) + vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); + else + vfio_mp_sync_send_fd(conn_sock, fd); + break; + case SOCKET_REQ_GROUP: + /* wait for group number */ + vfio_group_no = vfio_mp_sync_receive_request(conn_sock); + if (vfio_group_no < 0) { + close(conn_sock); + continue; + } + + fd = pci_vfio_get_group_fd(vfio_group_no); + + if (fd < 0) + vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); + /* if VFIO group exists but isn't bound to VFIO driver */ + else if (fd == 0) + vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD); + /* if group exists and is bound to VFIO driver */ + else { + vfio_mp_sync_send_request(conn_sock, SOCKET_OK); + vfio_mp_sync_send_fd(conn_sock, fd); + } + break; + default: + vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); + break; + } + close(conn_sock); + } +} + +static int +vfio_mp_sync_socket_setup(void) +{ + int ret, socket_fd; + struct sockaddr_un addr; + socklen_t sockaddr_len; + + /* set up a socket */ + socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); + if (socket_fd < 0) { + RTE_LOG(ERR, EAL, "Failed to create socket!\n"); + return -1; + } + + get_socket_path(addr.sun_path, sizeof(addr.sun_path)); + addr.sun_family = AF_UNIX; + + sockaddr_len = sizeof(struct sockaddr_un); + + unlink(addr.sun_path); + + ret = bind(socket_fd, (struct sockaddr *) &addr, sockaddr_len); + if (ret) { + RTE_LOG(ERR, EAL, "Failed to bind socket: %s!\n", strerror(errno)); + close(socket_fd); + return -1; + } + + ret = listen(socket_fd, 50); + if (ret) { + RTE_LOG(ERR, EAL, "Failed to listen: %s!\n", strerror(errno)); + close(socket_fd); + return -1; + } + + /* save the socket in local configuration */ + mp_socket_fd = socket_fd; + + return 0; +} + +/* + * set up a local socket and tell it to listen for incoming connections + */ +int +pci_vfio_mp_sync_setup(void) +{ + int ret; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + if (vfio_mp_sync_socket_setup() < 0) { + RTE_LOG(ERR, EAL, "Failed to set up local socket!\n"); + return -1; + } + + ret = pthread_create(&socket_thread, NULL, + pci_vfio_mp_sync_thread, NULL); + if (ret) { + RTE_LOG(ERR, EAL, + "Failed to create thread for communication with secondary processes!\n"); + close(mp_socket_fd); + return -1; + } + + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "pci-vfio-sync"); + ret = rte_thread_setname(socket_thread, thread_name); + if (ret) + RTE_LOG(ERR, EAL, + "Failed to set thread name for secondary processes!\n"); + + return 0; +} + +#endif diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_thread.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_thread.c new file mode 100644 index 00000000..18bd8e04 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_thread.c @@ -0,0 +1,199 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_thread.h" + +RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY; +RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY; +RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset); + +/* + * Send a message to a slave lcore identified by slave_id to call a + * function f with argument arg. Once the execution is done, the + * remote lcore switch in FINISHED state. + */ +int +rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id) +{ + int n; + char c = 0; + int m2s = lcore_config[slave_id].pipe_master2slave[1]; + int s2m = lcore_config[slave_id].pipe_slave2master[0]; + + if (lcore_config[slave_id].state != WAIT) + return -EBUSY; + + lcore_config[slave_id].f = f; + lcore_config[slave_id].arg = arg; + + /* send message */ + n = 0; + while (n == 0 || (n < 0 && errno == EINTR)) + n = write(m2s, &c, 1); + if (n < 0) + rte_panic("cannot write on configuration pipe\n"); + + /* wait ack */ + do { + n = read(s2m, &c, 1); + } while (n < 0 && errno == EINTR); + + if (n <= 0) + rte_panic("cannot read on configuration pipe\n"); + + return 0; +} + +/* set affinity for current EAL thread */ +static int +eal_thread_set_affinity(void) +{ + unsigned lcore_id = rte_lcore_id(); + + /* acquire system unique id */ + rte_gettid(); + + /* update EAL thread core affinity */ + return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset); +} + +void eal_thread_init_master(unsigned lcore_id) +{ + /* set the lcore ID in per-lcore memory area */ + RTE_PER_LCORE(_lcore_id) = lcore_id; + + /* set CPU affinity */ + if (eal_thread_set_affinity() < 0) + rte_panic("cannot set affinity\n"); +} + +/* main loop of threads */ +__attribute__((noreturn)) void * +eal_thread_loop(__attribute__((unused)) void *arg) +{ + char c; + int n, ret; + unsigned lcore_id; + pthread_t thread_id; + int m2s, s2m; + char cpuset[RTE_CPU_AFFINITY_STR_LEN]; + + thread_id = pthread_self(); + + /* retrieve our lcore_id from the configuration structure */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (thread_id == lcore_config[lcore_id].thread_id) + break; + } + if (lcore_id == RTE_MAX_LCORE) + rte_panic("cannot retrieve lcore id\n"); + + m2s = lcore_config[lcore_id].pipe_master2slave[0]; + s2m = lcore_config[lcore_id].pipe_slave2master[1]; + + /* set the lcore ID in per-lcore memory area */ + RTE_PER_LCORE(_lcore_id) = lcore_id; + + /* set CPU affinity */ + if (eal_thread_set_affinity() < 0) + rte_panic("cannot set affinity\n"); + + ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + + RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%x;cpuset=[%s%s])\n", + lcore_id, (int)thread_id, cpuset, ret == 0 ? "" : "..."); + + /* read on our pipe to get commands */ + while (1) { + void *fct_arg; + + /* wait command */ + do { + n = read(m2s, &c, 1); + } while (n < 0 && errno == EINTR); + + if (n <= 0) + rte_panic("cannot read on configuration pipe\n"); + + lcore_config[lcore_id].state = RUNNING; + + /* send ack */ + n = 0; + while (n == 0 || (n < 0 && errno == EINTR)) + n = write(s2m, &c, 1); + if (n < 0) + rte_panic("cannot write on configuration pipe\n"); + + if (lcore_config[lcore_id].f == NULL) + rte_panic("NULL function pointer\n"); + + /* call the function and store the return value */ + fct_arg = lcore_config[lcore_id].arg; + ret = lcore_config[lcore_id].f(fct_arg); + lcore_config[lcore_id].ret = ret; + rte_wmb(); + lcore_config[lcore_id].state = FINISHED; + } + + /* never reached */ + /* pthread_exit(NULL); */ + /* return NULL; */ +} + +/* require calling thread tid by gettid() */ +int rte_sys_gettid(void) +{ + return (int)syscall(SYS_gettid); +} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_timer.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_timer.c new file mode 100644 index 00000000..9ceff330 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_timer.c @@ -0,0 +1,304 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2012-2013 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_internal_cfg.h" + +enum timer_source eal_timer_source = EAL_TIMER_HPET; + +#ifdef RTE_LIBEAL_USE_HPET + +#define DEV_HPET "/dev/hpet" + +/* Maximum number of counters. */ +#define HPET_TIMER_NUM 3 + +/* General capabilities register */ +#define CLK_PERIOD_SHIFT 32 /* Clock period shift. */ +#define CLK_PERIOD_MASK 0xffffffff00000000ULL /* Clock period mask. */ + +/** + * HPET timer registers. From the Intel IA-PC HPET (High Precision Event + * Timers) Specification. + */ +struct eal_hpet_regs { + /* Memory-mapped, software visible registers */ + uint64_t capabilities; /**< RO General Capabilities Register. */ + uint64_t reserved0; /**< Reserved for future use. */ + uint64_t config; /**< RW General Configuration Register. */ + uint64_t reserved1; /**< Reserved for future use. */ + uint64_t isr; /**< RW Clear General Interrupt Status. */ + uint64_t reserved2[25]; /**< Reserved for future use. */ + union { + uint64_t counter; /**< RW Main Counter Value Register. */ + struct { + uint32_t counter_l; /**< RW Main Counter Low. */ + uint32_t counter_h; /**< RW Main Counter High. */ + }; + }; + uint64_t reserved3; /**< Reserved for future use. */ + struct { + uint64_t config; /**< RW Timer Config and Capability Reg. */ + uint64_t comp; /**< RW Timer Comparator Value Register. */ + uint64_t fsb; /**< RW FSB Interrupt Route Register. */ + uint64_t reserved4; /**< Reserved for future use. */ + } timers[HPET_TIMER_NUM]; /**< Set of HPET timers. */ +}; + +/* Mmap'd hpet registers */ +static volatile struct eal_hpet_regs *eal_hpet = NULL; + +/* Period at which the HPET counter increments in + * femtoseconds (10^-15 seconds). */ +static uint32_t eal_hpet_resolution_fs = 0; + +/* Frequency of the HPET counter in Hz */ +static uint64_t eal_hpet_resolution_hz = 0; + +/* Incremented 4 times during one 32bits hpet full count */ +static uint32_t eal_hpet_msb; + +static pthread_t msb_inc_thread_id; + +/* + * This function runs on a specific thread to update a global variable + * containing used to process MSB of the HPET (unfortunatelly, we need + * this because hpet is 32 bits by default under linux). + */ +static void +hpet_msb_inc(__attribute__((unused)) void *arg) +{ + uint32_t t; + + while (1) { + t = (eal_hpet->counter_l >> 30); + if (t != (eal_hpet_msb & 3)) + eal_hpet_msb ++; + sleep(10); + } +} + +uint64_t +rte_get_hpet_hz(void) +{ + if(internal_config.no_hpet) + rte_panic("Error, HPET called, but no HPET present\n"); + + return eal_hpet_resolution_hz; +} + +uint64_t +rte_get_hpet_cycles(void) +{ + uint32_t t, msb; + uint64_t ret; + + if(internal_config.no_hpet) + rte_panic("Error, HPET called, but no HPET present\n"); + + t = eal_hpet->counter_l; + msb = eal_hpet_msb; + ret = (msb + 2 - (t >> 30)) / 4; + ret <<= 32; + ret += t; + return ret; +} + +#endif + +#ifdef RTE_LIBEAL_USE_HPET +/* + * Open and mmap /dev/hpet (high precision event timer) that will + * provide our time reference. + */ +int +rte_eal_hpet_init(int make_default) +{ + int fd, ret; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + if (internal_config.no_hpet) { + RTE_LOG(NOTICE, EAL, "HPET is disabled\n"); + return -1; + } + + fd = open(DEV_HPET, O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "ERROR: Cannot open "DEV_HPET": %s!\n", + strerror(errno)); + internal_config.no_hpet = 1; + return -1; + } + eal_hpet = mmap(NULL, 1024, PROT_READ, MAP_SHARED, fd, 0); + if (eal_hpet == MAP_FAILED) { + RTE_LOG(ERR, EAL, "ERROR: Cannot mmap "DEV_HPET"!\n" + "Please enable CONFIG_HPET_MMAP in your kernel configuration " + "to allow HPET support.\n" + "To run without using HPET, set CONFIG_RTE_LIBEAL_USE_HPET=n " + "in your build configuration or use '--no-hpet' EAL flag.\n"); + close(fd); + internal_config.no_hpet = 1; + return -1; + } + close(fd); + + eal_hpet_resolution_fs = (uint32_t)((eal_hpet->capabilities & + CLK_PERIOD_MASK) >> + CLK_PERIOD_SHIFT); + + eal_hpet_resolution_hz = (1000ULL*1000ULL*1000ULL*1000ULL*1000ULL) / + (uint64_t)eal_hpet_resolution_fs; + + RTE_LOG(INFO, EAL, "HPET frequency is ~%"PRIu64" kHz\n", + eal_hpet_resolution_hz/1000); + + eal_hpet_msb = (eal_hpet->counter_l >> 30); + + /* create a thread that will increment a global variable for + * msb (hpet is 32 bits by default under linux) */ + ret = pthread_create(&msb_inc_thread_id, NULL, + (void *(*)(void *))hpet_msb_inc, NULL); + if (ret != 0) { + RTE_LOG(ERR, EAL, "ERROR: Cannot create HPET timer thread!\n"); + internal_config.no_hpet = 1; + return -1; + } + + /* + * Set thread_name for aid in debugging. + */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "hpet-msb-inc"); + ret = rte_thread_setname(msb_inc_thread_id, thread_name); + if (ret != 0) + RTE_LOG(ERR, EAL, + "ERROR: Cannot set HPET timer thread name!\n"); + + if (make_default) + eal_timer_source = EAL_TIMER_HPET; + return 0; +} +#endif + +static void +check_tsc_flags(void) +{ + char line[512]; + FILE *stream; + + stream = fopen("/proc/cpuinfo", "r"); + if (!stream) { + RTE_LOG(WARNING, EAL, "WARNING: Unable to open /proc/cpuinfo\n"); + return; + } + + while (fgets(line, sizeof line, stream)) { + char *constant_tsc; + char *nonstop_tsc; + + if (strncmp(line, "flags", 5) != 0) + continue; + + constant_tsc = strstr(line, "constant_tsc"); + nonstop_tsc = strstr(line, "nonstop_tsc"); + if (!constant_tsc || !nonstop_tsc) + RTE_LOG(WARNING, EAL, + "WARNING: cpu flags " + "constant_tsc=%s " + "nonstop_tsc=%s " + "-> using unreliable clock cycles !\n", + constant_tsc ? "yes":"no", + nonstop_tsc ? "yes":"no"); + break; + } + + fclose(stream); +} + +uint64_t +get_tsc_freq(void) +{ +#ifdef CLOCK_MONOTONIC_RAW +#define NS_PER_SEC 1E9 + + struct timespec sleeptime = {.tv_nsec = 5E8 }; /* 1/2 second */ + + struct timespec t_start, t_end; + uint64_t tsc_hz; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &t_start) == 0) { + uint64_t ns, end, start = rte_rdtsc(); + nanosleep(&sleeptime,NULL); + clock_gettime(CLOCK_MONOTONIC_RAW, &t_end); + end = rte_rdtsc(); + ns = ((t_end.tv_sec - t_start.tv_sec) * NS_PER_SEC); + ns += (t_end.tv_nsec - t_start.tv_nsec); + + double secs = (double)ns/NS_PER_SEC; + tsc_hz = (uint64_t)((end - start)/secs); + return tsc_hz; + } +#endif + return 0; +} + +int +rte_eal_timer_init(void) +{ + + eal_timer_source = EAL_TIMER_TSC; + + set_tsc_freq(); + check_tsc_flags(); + return 0; +} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_vfio.h b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_vfio.h new file mode 100644 index 00000000..72ec3f62 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -0,0 +1,59 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EAL_VFIO_H_ +#define EAL_VFIO_H_ + +/* + * determine if VFIO is present on the system + */ +#ifdef RTE_EAL_VFIO +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) +#define RTE_PCI_MSIX_TABLE_BIR 0x7 +#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8 +#define RTE_PCI_MSIX_FLAGS_QSIZE 0x07ff +#else +#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR +#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET +#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE +#endif + +#define VFIO_PRESENT +#endif /* kernel version */ +#endif /* RTE_EAL_VFIO */ + +#endif /* EAL_VFIO_H_ */ diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h b/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h new file mode 100644 index 00000000..d9707780 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h @@ -0,0 +1,108 @@ +/*- + * This file is provided under a dual BSD/LGPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GNU LESSER GENERAL PUBLIC LICENSE + * + * Copyright(c) 2007-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * Contact Information: + * Intel Corporation + * + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _RTE_DOM0_COMMON_H_ +#define _RTE_DOM0_COMMON_H_ + +#ifdef __KERNEL__ +#include +#endif + +#define DOM0_NAME_MAX 256 +#define DOM0_MM_DEV "/dev/dom0_mm" + +#define DOM0_CONTIG_NUM_ORDER 9 /**< order of 2M */ +#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */ +#define DOM0_MEMBLOCK_SIZE 0x200000 /**< size of memory block(2M). */ +#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */ +#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */ + +#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info) +#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *) +#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int) +#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *) + +/** + * A structure used to store memory information. + */ +struct memory_info { + char name[DOM0_NAME_MAX]; + uint64_t size; +}; + +/** + * A structure used to store memory segment information. + */ +struct memseg_info { + uint32_t idx; + uint64_t pfn; + uint64_t size; + uint64_t mfn[DOM0_NUM_MEMBLOCK]; +}; + +/** + * A structure used to store memory block information. + */ +struct memblock_info { + uint8_t exchange_flag; + uint8_t used; + uint64_t vir_addr; + uint64_t pfn; + uint64_t mfn; +}; +#endif /* _RTE_DOM0_COMMON_H_ */ diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h new file mode 100644 index 00000000..3dacbff8 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h @@ -0,0 +1,228 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_INTERRUPTS_H_ +#error "don't include this file directly, please include generic " +#endif + +#ifndef _RTE_LINUXAPP_INTERRUPTS_H_ +#define _RTE_LINUXAPP_INTERRUPTS_H_ + +#define RTE_MAX_RXTX_INTR_VEC_ID 32 +#define RTE_INTR_VEC_ZERO_OFFSET 0 +#define RTE_INTR_VEC_RXTX_OFFSET 1 + +enum rte_intr_handle_type { + RTE_INTR_HANDLE_UNKNOWN = 0, + RTE_INTR_HANDLE_UIO, /**< uio device handle */ + RTE_INTR_HANDLE_UIO_INTX, /**< uio generic handle */ + RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */ + RTE_INTR_HANDLE_VFIO_MSI, /**< vfio device handle (MSI) */ + RTE_INTR_HANDLE_VFIO_MSIX, /**< vfio device handle (MSIX) */ + RTE_INTR_HANDLE_ALARM, /**< alarm handle */ + RTE_INTR_HANDLE_EXT, /**< external handler */ + RTE_INTR_HANDLE_MAX +}; + +#define RTE_INTR_EVENT_ADD 1UL +#define RTE_INTR_EVENT_DEL 2UL + +typedef void (*rte_intr_event_cb_t)(int fd, void *arg); + +struct rte_epoll_data { + uint32_t event; /**< event type */ + void *data; /**< User data */ + rte_intr_event_cb_t cb_fun; /**< IN: callback fun */ + void *cb_arg; /**< IN: callback arg */ +}; + +enum { + RTE_EPOLL_INVALID = 0, + RTE_EPOLL_VALID, + RTE_EPOLL_EXEC, +}; + +/** interrupt epoll event obj, taken by epoll_event.ptr */ +struct rte_epoll_event { + volatile uint32_t status; /**< OUT: event status */ + int fd; /**< OUT: event fd */ + int epfd; /**< OUT: epoll instance the ev associated with */ + struct rte_epoll_data epdata; +}; + +/** Handle for interrupts. */ +struct rte_intr_handle { + union { + int vfio_dev_fd; /**< VFIO device file descriptor */ + int uio_cfg_fd; /**< UIO config file descriptor + for uio_pci_generic */ + }; + int fd; /**< interrupt event file descriptor */ + enum rte_intr_handle_type type; /**< handle type */ + uint32_t max_intr; /**< max interrupt requested */ + uint32_t nb_efd; /**< number of available efd(event fd) */ + int efds[RTE_MAX_RXTX_INTR_VEC_ID]; /**< intr vectors/efds mapping */ + struct rte_epoll_event elist[RTE_MAX_RXTX_INTR_VEC_ID]; + /**< intr vector epoll event */ + int *intr_vec; /**< intr vector number array */ +}; + +#define RTE_EPOLL_PER_THREAD -1 /**< to hint using per thread epfd */ + +/** + * It waits for events on the epoll instance. + * + * @param epfd + * Epoll instance fd on which the caller wait for events. + * @param events + * Memory area contains the events that will be available for the caller. + * @param maxevents + * Up to maxevents are returned, must greater than zero. + * @param timeout + * Specifying a timeout of -1 causes a block indefinitely. + * Specifying a timeout equal to zero cause to return immediately. + * @return + * - On success, returns the number of available event. + * - On failure, a negative value. + */ +int +rte_epoll_wait(int epfd, struct rte_epoll_event *events, + int maxevents, int timeout); + +/** + * It performs control operations on epoll instance referred by the epfd. + * It requests that the operation op be performed for the target fd. + * + * @param epfd + * Epoll instance fd on which the caller perform control operations. + * @param op + * The operation be performed for the target fd. + * @param fd + * The target fd on which the control ops perform. + * @param event + * Describes the object linked to the fd. + * Note: The caller must take care the object deletion after CTL_DEL. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_epoll_ctl(int epfd, int op, int fd, + struct rte_epoll_event *event); + +/** + * The function returns the per thread epoll instance. + * + * @return + * epfd the epoll instance referred to. + */ +int +rte_intr_tls_epfd(void); + +/** + * @param intr_handle + * Pointer to the interrupt handle. + * @param epfd + * Epoll instance fd which the intr vector associated to. + * @param op + * The operation be performed for the vector. + * Operation type of {ADD, DEL}. + * @param vec + * RX intr vector number added to the epoll instance wait list. + * @param data + * User raw data. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, + int epfd, int op, unsigned int vec, void *data); + +/** + * It enables the packet I/O interrupt event if it's necessary. + * It creates event fd for each interrupt vector when MSIX is used, + * otherwise it multiplexes a single event fd. + * + * @param intr_handle + * Pointer to the interrupt handle. + * @param nb_efd + * Number of interrupt vector trying to enable. + * The value 0 is not allowed. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd); + +/** + * It disables the packet I/O interrupt event. + * It deletes registered eventfds and closes the open fds. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +void +rte_intr_efd_disable(struct rte_intr_handle *intr_handle); + +/** + * The packet I/O interrupt on datapath is enabled or not. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int +rte_intr_dp_is_en(struct rte_intr_handle *intr_handle); + +/** + * The interrupt handle instance allows other causes or not. + * Other causes stand for any none packet I/O interrupts. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int +rte_intr_allow_others(struct rte_intr_handle *intr_handle); + +/** + * The multiple interrupt vector capability of interrupt handle instance. + * It returns zero if no multiple interrupt vector support. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int +rte_intr_cap_multiple(struct rte_intr_handle *intr_handle); + +#endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */ diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h new file mode 100644 index 00000000..bd1cc094 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h @@ -0,0 +1,174 @@ +/*- + * This file is provided under a dual BSD/LGPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GNU LESSER GENERAL PUBLIC LICENSE + * + * Copyright(c) 2007-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * Contact Information: + * Intel Corporation + * + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _RTE_KNI_COMMON_H_ +#define _RTE_KNI_COMMON_H_ + +#ifdef __KERNEL__ +#include +#endif + +/** + * KNI name is part of memzone name. + */ +#define RTE_KNI_NAMESIZE 32 + +#ifndef RTE_CACHE_LINE_SIZE +#define RTE_CACHE_LINE_SIZE 64 /**< Cache line size. */ +#endif + +/* + * Request id. + */ +enum rte_kni_req_id { + RTE_KNI_REQ_UNKNOWN = 0, + RTE_KNI_REQ_CHANGE_MTU, + RTE_KNI_REQ_CFG_NETWORK_IF, + RTE_KNI_REQ_MAX, +}; + +/* + * Structure for KNI request. + */ +struct rte_kni_request { + uint32_t req_id; /**< Request id */ + union { + uint32_t new_mtu; /**< New MTU */ + uint8_t if_up; /**< 1: interface up, 0: interface down */ + }; + int32_t result; /**< Result for processing request */ +} __attribute__((__packed__)); + +/* + * Fifo struct mapped in a shared memory. It describes a circular buffer FIFO + * Write and read should wrap around. Fifo is empty when write == read + * Writing should never overwrite the read position + */ +struct rte_kni_fifo { + volatile unsigned write; /**< Next position to be written*/ + volatile unsigned read; /**< Next position to be read */ + unsigned len; /**< Circular buffer length */ + unsigned elem_size; /**< Pointer size - for 32/64 bit OS */ + void * volatile buffer[0]; /**< The buffer contains mbuf pointers */ +}; + +/* + * The kernel image of the rte_mbuf struct, with only the relevant fields. + * Padding is necessary to assure the offsets of these fields + */ +struct rte_kni_mbuf { + void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE))); + char pad0[10]; + uint16_t data_off; /**< Start address of data in segment buffer. */ + char pad1[4]; + uint64_t ol_flags; /**< Offload features. */ + char pad2[4]; + uint32_t pkt_len; /**< Total pkt len: sum of all segment data_len. */ + uint16_t data_len; /**< Amount of data in segment buffer. */ + + /* fields on second cache line */ + char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_SIZE))); + void *pool; + void *next; +}; + +/* + * Struct used to create a KNI device. Passed to the kernel in IOCTL call + */ + +struct rte_kni_device_info { + char name[RTE_KNI_NAMESIZE]; /**< Network device name for KNI */ + + phys_addr_t tx_phys; + phys_addr_t rx_phys; + phys_addr_t alloc_phys; + phys_addr_t free_phys; + + /* Used by Ethtool */ + phys_addr_t req_phys; + phys_addr_t resp_phys; + phys_addr_t sync_phys; + void * sync_va; + + /* mbuf mempool */ + void * mbuf_va; + phys_addr_t mbuf_phys; + + /* PCI info */ + uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */ + uint16_t device_id; /**< Device ID or PCI_ANY_ID. */ + uint8_t bus; /**< Device bus */ + uint8_t devid; /**< Device ID */ + uint8_t function; /**< Device function. */ + + uint16_t group_id; /**< Group ID */ + uint32_t core_id; /**< core ID to bind for kernel thread */ + + uint8_t force_bind : 1; /**< Flag for kernel thread binding */ + + /* mbuf size */ + unsigned mbuf_size; +}; + +#define KNI_DEVICE "kni" + +#define RTE_KNI_IOCTL_TEST _IOWR(0, 1, int) +#define RTE_KNI_IOCTL_CREATE _IOWR(0, 2, struct rte_kni_device_info) +#define RTE_KNI_IOCTL_RELEASE _IOWR(0, 3, struct rte_kni_device_info) + +#endif /* _RTE_KNI_COMMON_H_ */ diff --git a/src/dpdk22/lib/librte_eal/linuxapp/igb_uio/compat.h b/src/dpdk22/lib/librte_eal/linuxapp/igb_uio/compat.h new file mode 100644 index 00000000..c1d45a66 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/igb_uio/compat.h @@ -0,0 +1,116 @@ +/* + * Minimal wrappers to allow compiling igb_uio on older kernels. + */ + +#ifndef RHEL_RELEASE_VERSION +#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) +#define pci_cfg_access_lock pci_block_user_cfg_access +#define pci_cfg_access_unlock pci_unblock_user_cfg_access +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) +#define HAVE_PTE_MASK_PAGE_IOMAP +#endif + +#ifndef PCI_MSIX_ENTRY_SIZE +#define PCI_MSIX_ENTRY_SIZE 16 +#define PCI_MSIX_ENTRY_LOWER_ADDR 0 +#define PCI_MSIX_ENTRY_UPPER_ADDR 4 +#define PCI_MSIX_ENTRY_DATA 8 +#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 +#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34) && \ + (!(defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5, 9))) + +static int pci_num_vf(struct pci_dev *dev) +{ + struct iov { + int pos; + int nres; + u32 cap; + u16 ctrl; + u16 total; + u16 initial; + u16 nr_virtfn; + } *iov = (struct iov *)dev->sriov; + + if (!dev->is_physfn) + return 0; + + return iov->nr_virtfn; +} + +#endif /* < 2.6.34 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \ + (!(defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4))) + +#define kstrtoul strict_strtoul + +#endif /* < 2.6.39 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) && \ + (!(defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 3))) + +/* Check if INTX works to control irq's. + * Set's INTX_DISABLE flag and reads it back + */ +static bool pci_intx_mask_supported(struct pci_dev *pdev) +{ + bool mask_supported = false; + uint16_t orig, new; + + pci_block_user_cfg_access(pdev); + pci_read_config_word(pdev, PCI_COMMAND, &orig); + pci_write_config_word(pdev, PCI_COMMAND, + orig ^ PCI_COMMAND_INTX_DISABLE); + pci_read_config_word(pdev, PCI_COMMAND, &new); + + if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) { + dev_err(&pdev->dev, "Command register changed from " + "0x%x to 0x%x: driver or hardware bug?\n", orig, new); + } else if ((new ^ orig) & PCI_COMMAND_INTX_DISABLE) { + mask_supported = true; + pci_write_config_word(pdev, PCI_COMMAND, orig); + } + pci_unblock_user_cfg_access(pdev); + + return mask_supported; +} + +static bool pci_check_and_mask_intx(struct pci_dev *pdev) +{ + bool pending; + uint32_t status; + + pci_block_user_cfg_access(pdev); + pci_read_config_dword(pdev, PCI_COMMAND, &status); + + /* interrupt is not ours, goes to out */ + pending = (((status >> 16) & PCI_STATUS_INTERRUPT) != 0); + if (pending) { + uint16_t old, new; + + old = status; + if (status != 0) + new = old & (~PCI_COMMAND_INTX_DISABLE); + else + new = old | PCI_COMMAND_INTX_DISABLE; + + if (old != new) + pci_write_config_word(pdev, PCI_COMMAND, new); + } + pci_unblock_user_cfg_access(pdev); + + return pending; +} + +#endif /* < 3.3.0 */ diff --git a/src/dpdk22/lib/librte_eal/linuxapp/xen_dom0/compat.h b/src/dpdk22/lib/librte_eal/linuxapp/xen_dom0/compat.h new file mode 100644 index 00000000..e6eb97f2 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/xen_dom0/compat.h @@ -0,0 +1,15 @@ +/* + * Minimal wrappers to allow compiling xen_dom0 on older kernels. + */ + +#ifndef RHEL_RELEASE_VERSION +#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \ + (!(defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4))) + +#define kstrtoul strict_strtoul + +#endif /* < 2.6.39 */ diff --git a/src/dpdk22/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h b/src/dpdk22/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h new file mode 100644 index 00000000..9d5ffb22 --- /dev/null +++ b/src/dpdk22/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h @@ -0,0 +1,107 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#ifndef _DOM0_MM_DEV_H_ +#define _DOM0_MM_DEV_H_ + +#include +#include +#include +#include +#include + +#define NUM_MEM_CTX 256 /**< Maximum number of memory context*/ +#define MAX_EXCHANGE_FAIL_TIME 5 /**< Maximum times of allowing exchange fail .*/ +#define MAX_MEMBLOCK_SIZE (2 * DOM0_MEMBLOCK_SIZE) +#define MAX_NUM_ORDER (DOM0_CONTIG_NUM_ORDER + 1) +#define SIZE_PER_BLOCK 2 /**< Size of memory block (2MB).*/ + +/** + * A structure describing the private information for a dom0 device. + */ +struct dom0_mm_dev { + struct miscdevice miscdev; + uint8_t fail_times; + uint32_t used_memsize; + uint32_t num_mem_ctx; + uint32_t config_memsize; + uint32_t num_bigblock; + struct dom0_mm_data *mm_data[NUM_MEM_CTX]; + struct mutex data_lock; +}; + +struct dom0_mm_data{ + uint32_t refcnt; + uint32_t num_memseg; /**< Number of memory segment. */ + uint32_t mem_size; /**< Size of requesting memory. */ + + char name[DOM0_NAME_MAX]; + + /** Store global memory block IDs used by an instance */ + uint32_t block_num[DOM0_NUM_MEMBLOCK]; + + /** Store memory block information.*/ + struct memblock_info block_info[DOM0_NUM_MEMBLOCK]; + + /** Store memory segment information.*/ + struct memseg_info seg_info[DOM0_NUM_MEMSEG]; +}; + +#define XEN_ERR(args...) printk(KERN_DEBUG "XEN_DOM0: Error: " args) +#define XEN_PRINT(args...) printk(KERN_DEBUG "XEN_DOM0: " args) +#endif diff --git a/src/dpdk22/lib/librte_ether/rte_dev_info.h b/src/dpdk22/lib/librte_ether/rte_dev_info.h new file mode 100644 index 00000000..291bd4d7 --- /dev/null +++ b/src/dpdk22/lib/librte_ether/rte_dev_info.h @@ -0,0 +1,57 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_DEV_INFO_H_ +#define _RTE_DEV_INFO_H_ + +/* + * Placeholder for accessing device registers + */ +struct rte_dev_reg_info { + void *data; /**< Buffer for return registers */ + uint32_t offset; /**< Start register table location for access */ + uint32_t length; /**< Number of registers to fetch */ + uint32_t version; /**< Device version */ +}; + +/* + * Placeholder for accessing device eeprom + */ +struct rte_dev_eeprom_info { + void *data; /**< Buffer for return eeprom */ + uint32_t offset; /**< Start eeprom address for access*/ + uint32_t length; /**< Length of eeprom region to access */ + uint32_t magic; /**< Device-specific key, such as device-id */ +}; + +#endif /* _RTE_DEV_INFO_H_ */ diff --git a/src/dpdk22/lib/librte_ether/rte_eth_ctrl.h b/src/dpdk22/lib/librte_ether/rte_eth_ctrl.h new file mode 100644 index 00000000..ce224adb --- /dev/null +++ b/src/dpdk22/lib/librte_ether/rte_eth_ctrl.h @@ -0,0 +1,811 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETH_CTRL_H_ +#define _RTE_ETH_CTRL_H_ + +/** + * @file + * + * Ethernet device features and related data structures used + * by control APIs should be defined in this file. + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * A packet can be identified by hardware as different flow types. Different + * NIC hardwares may support different flow types. + * Basically, the NIC hardware identifies the flow type as deep protocol as + * possible, and exclusively. For example, if a packet is identified as + * 'RTE_ETH_FLOW_NONFRAG_IPV4_TCP', it will not be any of other flow types, + * though it is an actual IPV4 packet. + * Note that the flow types are used to define RSS offload types in + * rte_ethdev.h. + */ +#define RTE_ETH_FLOW_UNKNOWN 0 +#define RTE_ETH_FLOW_RAW 1 +#define RTE_ETH_FLOW_IPV4 2 +#define RTE_ETH_FLOW_FRAG_IPV4 3 +#define RTE_ETH_FLOW_NONFRAG_IPV4_TCP 4 +#define RTE_ETH_FLOW_NONFRAG_IPV4_UDP 5 +#define RTE_ETH_FLOW_NONFRAG_IPV4_SCTP 6 +#define RTE_ETH_FLOW_NONFRAG_IPV4_OTHER 7 +#define RTE_ETH_FLOW_IPV6 8 +#define RTE_ETH_FLOW_FRAG_IPV6 9 +#define RTE_ETH_FLOW_NONFRAG_IPV6_TCP 10 +#define RTE_ETH_FLOW_NONFRAG_IPV6_UDP 11 +#define RTE_ETH_FLOW_NONFRAG_IPV6_SCTP 12 +#define RTE_ETH_FLOW_NONFRAG_IPV6_OTHER 13 +#define RTE_ETH_FLOW_L2_PAYLOAD 14 +#define RTE_ETH_FLOW_IPV6_EX 15 +#define RTE_ETH_FLOW_IPV6_TCP_EX 16 +#define RTE_ETH_FLOW_IPV6_UDP_EX 17 +#define RTE_ETH_FLOW_MAX 18 + +/** + * Feature filter types + */ +enum rte_filter_type { + RTE_ETH_FILTER_NONE = 0, + RTE_ETH_FILTER_MACVLAN, + RTE_ETH_FILTER_ETHERTYPE, + RTE_ETH_FILTER_FLEXIBLE, + RTE_ETH_FILTER_SYN, + RTE_ETH_FILTER_NTUPLE, + RTE_ETH_FILTER_TUNNEL, + RTE_ETH_FILTER_FDIR, + RTE_ETH_FILTER_HASH, + RTE_ETH_FILTER_MAX +}; + +/** + * Generic operations on filters + */ +enum rte_filter_op { + /** used to check whether the type filter is supported */ + RTE_ETH_FILTER_NOP = 0, + RTE_ETH_FILTER_ADD, /**< add filter entry */ + RTE_ETH_FILTER_UPDATE, /**< update filter entry */ + RTE_ETH_FILTER_DELETE, /**< delete filter entry */ + RTE_ETH_FILTER_FLUSH, /**< flush all entries */ + RTE_ETH_FILTER_GET, /**< get filter entry */ + RTE_ETH_FILTER_SET, /**< configurations */ + RTE_ETH_FILTER_INFO, /**< retrieve information */ + RTE_ETH_FILTER_STATS, /**< retrieve statistics */ + RTE_ETH_FILTER_OP_MAX +}; + +/** + * MAC filter type + */ +enum rte_mac_filter_type { + RTE_MAC_PERFECT_MATCH = 1, /**< exact match of MAC addr. */ + RTE_MACVLAN_PERFECT_MATCH, /**< exact match of MAC addr and VLAN ID. */ + RTE_MAC_HASH_MATCH, /**< hash match of MAC addr. */ + /** hash match of MAC addr and exact match of VLAN ID. */ + RTE_MACVLAN_HASH_MATCH, +}; + +/** + * MAC filter info + */ +struct rte_eth_mac_filter { + uint8_t is_vf; /**< 1 for VF, 0 for port dev */ + uint16_t dst_id; /**< VF ID, available when is_vf is 1*/ + enum rte_mac_filter_type filter_type; /**< MAC filter type */ + struct ether_addr mac_addr; +}; + +/** + * Define all structures for Ethertype Filter type. + */ + +#define RTE_ETHTYPE_FLAGS_MAC 0x0001 /**< If set, compare mac */ +#define RTE_ETHTYPE_FLAGS_DROP 0x0002 /**< If set, drop packet when match */ + +/** + * A structure used to define the ethertype filter entry + * to support RTE_ETH_FILTER_ETHERTYPE with RTE_ETH_FILTER_ADD, + * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. + */ +struct rte_eth_ethertype_filter { + struct ether_addr mac_addr; /**< Mac address to match. */ + uint16_t ether_type; /**< Ether type to match */ + uint16_t flags; /**< Flags from RTE_ETHTYPE_FLAGS_* */ + uint16_t queue; /**< Queue assigned to when match*/ +}; + +#define RTE_FLEX_FILTER_MAXLEN 128 /**< bytes to use in flex filter. */ +#define RTE_FLEX_FILTER_MASK_SIZE \ + (RTE_ALIGN(RTE_FLEX_FILTER_MAXLEN, CHAR_BIT) / CHAR_BIT) + /**< mask bytes in flex filter. */ + +/** + * A structure used to define the flex filter entry + * to support RTE_ETH_FILTER_FLEXIBLE with RTE_ETH_FILTER_ADD, + * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. + */ +struct rte_eth_flex_filter { + uint16_t len; + uint8_t bytes[RTE_FLEX_FILTER_MAXLEN]; /**< flex bytes in big endian.*/ + uint8_t mask[RTE_FLEX_FILTER_MASK_SIZE]; /**< if mask bit is 1b, do + not compare corresponding byte. */ + uint8_t priority; + uint16_t queue; /**< Queue assigned to when match. */ +}; + +/** + * A structure used to define the TCP syn filter entry + * to support RTE_ETH_FILTER_SYN with RTE_ETH_FILTER_ADD, + * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. + */ +struct rte_eth_syn_filter { + uint8_t hig_pri; /**< 1 - higher priority than other filters, + 0 - lower priority. */ + uint16_t queue; /**< Queue assigned to when match */ +}; + +/** + * Define all structures for ntuple Filter type. + */ + +#define RTE_NTUPLE_FLAGS_DST_IP 0x0001 /**< If set, dst_ip is part of ntuple */ +#define RTE_NTUPLE_FLAGS_SRC_IP 0x0002 /**< If set, src_ip is part of ntuple */ +#define RTE_NTUPLE_FLAGS_DST_PORT 0x0004 /**< If set, dst_port is part of ntuple */ +#define RTE_NTUPLE_FLAGS_SRC_PORT 0x0008 /**< If set, src_port is part of ntuple */ +#define RTE_NTUPLE_FLAGS_PROTO 0x0010 /**< If set, protocol is part of ntuple */ +#define RTE_NTUPLE_FLAGS_TCP_FLAG 0x0020 /**< If set, tcp flag is involved */ + +#define RTE_5TUPLE_FLAGS ( \ + RTE_NTUPLE_FLAGS_DST_IP | \ + RTE_NTUPLE_FLAGS_SRC_IP | \ + RTE_NTUPLE_FLAGS_DST_PORT | \ + RTE_NTUPLE_FLAGS_SRC_PORT | \ + RTE_NTUPLE_FLAGS_PROTO) + +#define RTE_2TUPLE_FLAGS ( \ + RTE_NTUPLE_FLAGS_DST_PORT | \ + RTE_NTUPLE_FLAGS_PROTO) + +#define TCP_URG_FLAG 0x20 +#define TCP_ACK_FLAG 0x10 +#define TCP_PSH_FLAG 0x08 +#define TCP_RST_FLAG 0x04 +#define TCP_SYN_FLAG 0x02 +#define TCP_FIN_FLAG 0x01 +#define TCP_FLAG_ALL 0x3F + +/** + * A structure used to define the ntuple filter entry + * to support RTE_ETH_FILTER_NTUPLE with RTE_ETH_FILTER_ADD, + * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. + */ +struct rte_eth_ntuple_filter { + uint16_t flags; /**< Flags from RTE_NTUPLE_FLAGS_* */ + uint32_t dst_ip; /**< Destination IP address in big endian. */ + uint32_t dst_ip_mask; /**< Mask of destination IP address. */ + uint32_t src_ip; /**< Source IP address in big endian. */ + uint32_t src_ip_mask; /**< Mask of destination IP address. */ + uint16_t dst_port; /**< Destination port in big endian. */ + uint16_t dst_port_mask; /**< Mask of destination port. */ + uint16_t src_port; /**< Source Port in big endian. */ + uint16_t src_port_mask; /**< Mask of source port. */ + uint8_t proto; /**< L4 protocol. */ + uint8_t proto_mask; /**< Mask of L4 protocol. */ + /** tcp_flags only meaningful when the proto is TCP. + The packet matched above ntuple fields and contain + any set bit in tcp_flags will hit this filter. */ + uint8_t tcp_flags; + uint16_t priority; /**< seven levels (001b-111b), 111b is highest, + used when more than one filter matches. */ + uint16_t queue; /**< Queue assigned to when match*/ +}; + +/** + * Tunneled type. + */ +enum rte_eth_tunnel_type { + RTE_TUNNEL_TYPE_NONE = 0, + RTE_TUNNEL_TYPE_VXLAN, + RTE_TUNNEL_TYPE_GENEVE, + RTE_TUNNEL_TYPE_TEREDO, + RTE_TUNNEL_TYPE_NVGRE, + RTE_TUNNEL_TYPE_MAX, +}; + +/** + * filter type of tunneling packet + */ +#define ETH_TUNNEL_FILTER_OMAC 0x01 /**< filter by outer MAC addr */ +#define ETH_TUNNEL_FILTER_OIP 0x02 /**< filter by outer IP Addr */ +#define ETH_TUNNEL_FILTER_TENID 0x04 /**< filter by tenant ID */ +#define ETH_TUNNEL_FILTER_IMAC 0x08 /**< filter by inner MAC addr */ +#define ETH_TUNNEL_FILTER_IVLAN 0x10 /**< filter by inner VLAN ID */ +#define ETH_TUNNEL_FILTER_IIP 0x20 /**< filter by inner IP addr */ + +#define RTE_TUNNEL_FILTER_IMAC_IVLAN (ETH_TUNNEL_FILTER_IMAC | \ + ETH_TUNNEL_FILTER_IVLAN) +#define RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID (ETH_TUNNEL_FILTER_IMAC | \ + ETH_TUNNEL_FILTER_IVLAN | \ + ETH_TUNNEL_FILTER_TENID) +#define RTE_TUNNEL_FILTER_IMAC_TENID (ETH_TUNNEL_FILTER_IMAC | \ + ETH_TUNNEL_FILTER_TENID) +#define RTE_TUNNEL_FILTER_OMAC_TENID_IMAC (ETH_TUNNEL_FILTER_OMAC | \ + ETH_TUNNEL_FILTER_TENID | \ + ETH_TUNNEL_FILTER_IMAC) + +/** + * Select IPv4 or IPv6 for tunnel filters. + */ +enum rte_tunnel_iptype { + RTE_TUNNEL_IPTYPE_IPV4 = 0, /**< IPv4. */ + RTE_TUNNEL_IPTYPE_IPV6, /**< IPv6. */ +}; + +/** + * Tunneling Packet filter configuration. + */ +struct rte_eth_tunnel_filter_conf { + struct ether_addr *outer_mac; /**< Outer MAC address filter. */ + struct ether_addr *inner_mac; /**< Inner MAC address filter. */ + uint16_t inner_vlan; /**< Inner VLAN filter. */ + enum rte_tunnel_iptype ip_type; /**< IP address type. */ + union { + uint32_t ipv4_addr; /**< IPv4 source address to match. */ + uint32_t ipv6_addr[4]; /**< IPv6 source address to match. */ + } ip_addr; /**< IPv4/IPv6 source address to match (union of above). */ + + uint16_t filter_type; /**< Filter type. */ + enum rte_eth_tunnel_type tunnel_type; /**< Tunnel Type. */ + uint32_t tenant_id; /**< Tenant number. */ + uint16_t queue_id; /**< Queue number. */ +}; + +/** + * Global eth device configuration type. + */ +enum rte_eth_global_cfg_type { + RTE_ETH_GLOBAL_CFG_TYPE_UNKNOWN = 0, + RTE_ETH_GLOBAL_CFG_TYPE_GRE_KEY_LEN, + RTE_ETH_GLOBAL_CFG_TYPE_MAX, +}; + +/** + * Global eth device configuration. + */ +struct rte_eth_global_cfg { + enum rte_eth_global_cfg_type cfg_type; /**< Global config type. */ + union { + uint8_t gre_key_len; /**< Valid GRE key length in byte. */ + uint64_t reserved; /**< Reserve space for future use. */ + } cfg; +}; + +#define RTE_ETH_FDIR_MAX_FLEXLEN 16 /**< Max length of flexbytes. */ +#define RTE_ETH_INSET_SIZE_MAX 128 /**< Max length of input set. */ + +/** + * Input set fields for Flow Director and Hash filters + */ +enum rte_eth_input_set_field { + RTE_ETH_INPUT_SET_UNKNOWN = 0, + + /* L2 */ + RTE_ETH_INPUT_SET_L2_SRC_MAC = 1, + RTE_ETH_INPUT_SET_L2_DST_MAC, + RTE_ETH_INPUT_SET_L2_OUTER_VLAN, + RTE_ETH_INPUT_SET_L2_INNER_VLAN, + RTE_ETH_INPUT_SET_L2_ETHERTYPE, + + /* L3 */ + RTE_ETH_INPUT_SET_L3_SRC_IP4 = 129, + RTE_ETH_INPUT_SET_L3_DST_IP4, + RTE_ETH_INPUT_SET_L3_SRC_IP6, + RTE_ETH_INPUT_SET_L3_DST_IP6, + RTE_ETH_INPUT_SET_L3_IP4_TOS, + RTE_ETH_INPUT_SET_L3_IP4_PROTO, + RTE_ETH_INPUT_SET_L3_IP6_TC, + RTE_ETH_INPUT_SET_L3_IP6_NEXT_HEADER, + + /* L4 */ + RTE_ETH_INPUT_SET_L4_UDP_SRC_PORT = 257, + RTE_ETH_INPUT_SET_L4_UDP_DST_PORT, + RTE_ETH_INPUT_SET_L4_TCP_SRC_PORT, + RTE_ETH_INPUT_SET_L4_TCP_DST_PORT, + RTE_ETH_INPUT_SET_L4_SCTP_SRC_PORT, + RTE_ETH_INPUT_SET_L4_SCTP_DST_PORT, + RTE_ETH_INPUT_SET_L4_SCTP_VERIFICATION_TAG, + + /* Tunnel */ + RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_DST_MAC = 385, + RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_SRC_MAC, + RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_VLAN, + RTE_ETH_INPUT_SET_TUNNEL_L4_UDP_KEY, + RTE_ETH_INPUT_SET_TUNNEL_GRE_KEY, + + /* Flexible Payload */ + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_1ST_WORD = 641, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_2ND_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_3RD_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_4TH_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_5TH_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_6TH_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_7TH_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_8TH_WORD, + + RTE_ETH_INPUT_SET_DEFAULT = 65533, + RTE_ETH_INPUT_SET_NONE = 65534, + RTE_ETH_INPUT_SET_MAX = 65535, +}; + +/** + * Filters input set operations + */ +enum rte_filter_input_set_op { + RTE_ETH_INPUT_SET_OP_UNKNOWN, + RTE_ETH_INPUT_SET_SELECT, /**< select input set */ + RTE_ETH_INPUT_SET_ADD, /**< add input set entry */ + RTE_ETH_INPUT_SET_OP_MAX +}; + + +/** + * A structure used to define the input set configuration for + * flow director and hash filters + */ +struct rte_eth_input_set_conf { + uint16_t flow_type; + uint16_t inset_size; + enum rte_eth_input_set_field field[RTE_ETH_INSET_SIZE_MAX]; + enum rte_filter_input_set_op op; +}; + +/** + * A structure used to define the input for L2 flow + */ +struct rte_eth_l2_flow { + uint16_t ether_type; /**< Ether type to match */ +}; + +/** + * A structure used to define the input for IPV4 flow + */ +struct rte_eth_ipv4_flow { + uint32_t src_ip; /**< IPv4 source address to match. */ + uint32_t dst_ip; /**< IPv4 destination address to match. */ +}; + +/** + * A structure used to define the input for IPV4 UDP flow + */ +struct rte_eth_udpv4_flow { + struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */ + uint16_t src_port; /**< UDP source port to match. */ + uint16_t dst_port; /**< UDP destination port to match. */ +}; + +/** + * A structure used to define the input for IPV4 TCP flow + */ +struct rte_eth_tcpv4_flow { + struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */ + uint16_t src_port; /**< TCP source port to match. */ + uint16_t dst_port; /**< TCP destination port to match. */ +}; + +/** + * A structure used to define the input for IPV4 SCTP flow + */ +struct rte_eth_sctpv4_flow { + struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */ + uint16_t src_port; /**< SCTP source port to match. */ + uint16_t dst_port; /**< SCTP destination port to match. */ + uint32_t verify_tag; /**< Verify tag to match */ +}; + +/** + * A structure used to define the input for IPV6 flow + */ +struct rte_eth_ipv6_flow { + uint32_t src_ip[4]; /**< IPv6 source address to match. */ + uint32_t dst_ip[4]; /**< IPv6 destination address to match. */ +}; + +/** + * A structure used to define the input for IPV6 UDP flow + */ +struct rte_eth_udpv6_flow { + struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */ + uint16_t src_port; /**< UDP source port to match. */ + uint16_t dst_port; /**< UDP destination port to match. */ +}; + +/** + * A structure used to define the input for IPV6 TCP flow + */ +struct rte_eth_tcpv6_flow { + struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */ + uint16_t src_port; /**< TCP source port to match. */ + uint16_t dst_port; /**< TCP destination port to match. */ +}; + +/** + * A structure used to define the input for IPV6 SCTP flow + */ +struct rte_eth_sctpv6_flow { + struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */ + uint16_t src_port; /**< SCTP source port to match. */ + uint16_t dst_port; /**< SCTP destination port to match. */ + uint32_t verify_tag; /**< Verify tag to match */ +}; + +/** + * A structure used to define the input for MAC VLAN flow + */ +struct rte_eth_mac_vlan_flow { + struct ether_addr mac_addr; /**< Mac address to match. */ +}; + +/** + * Tunnel type for flow director. + */ +enum rte_eth_fdir_tunnel_type { + RTE_FDIR_TUNNEL_TYPE_UNKNOWN = 0, + RTE_FDIR_TUNNEL_TYPE_NVGRE, + RTE_FDIR_TUNNEL_TYPE_VXLAN, +}; + +/** + * A structure used to define the input for tunnel flow, now it's VxLAN or + * NVGRE + */ +struct rte_eth_tunnel_flow { + enum rte_eth_fdir_tunnel_type tunnel_type; /**< Tunnel type to match. */ + uint32_t tunnel_id; /**< Tunnel ID to match. TNI, VNI... */ + struct ether_addr mac_addr; /**< Mac address to match. */ +}; + +/** + * An union contains the inputs for all types of flow + */ +union rte_eth_fdir_flow { + struct rte_eth_l2_flow l2_flow; + struct rte_eth_udpv4_flow udp4_flow; + struct rte_eth_tcpv4_flow tcp4_flow; + struct rte_eth_sctpv4_flow sctp4_flow; + struct rte_eth_ipv4_flow ip4_flow; + struct rte_eth_udpv6_flow udp6_flow; + struct rte_eth_tcpv6_flow tcp6_flow; + struct rte_eth_sctpv6_flow sctp6_flow; + struct rte_eth_ipv6_flow ipv6_flow; + struct rte_eth_mac_vlan_flow mac_vlan_flow; + struct rte_eth_tunnel_flow tunnel_flow; +}; + +/** + * A structure used to contain extend input of flow + */ +struct rte_eth_fdir_flow_ext { + uint16_t vlan_tci; + uint8_t flexbytes[RTE_ETH_FDIR_MAX_FLEXLEN]; + /**< It is filled by the flexible payload to match. */ + uint8_t is_vf; /**< 1 for VF, 0 for port dev */ + uint16_t dst_id; /**< VF ID, available when is_vf is 1*/ +}; + +/** + * A structure used to define the input for a flow director filter entry + */ +struct rte_eth_fdir_input { + uint16_t flow_type; + union rte_eth_fdir_flow flow; + /**< Flow fields to match, dependent on flow_type */ + struct rte_eth_fdir_flow_ext flow_ext; + /**< Additional fields to match */ +}; + +/** + * Behavior will be taken if FDIR match + */ +enum rte_eth_fdir_behavior { + RTE_ETH_FDIR_ACCEPT = 0, + RTE_ETH_FDIR_REJECT, + RTE_ETH_FDIR_PASSTHRU, +}; + +/** + * Flow director report status + * It defines what will be reported if FDIR entry is matched. + */ +enum rte_eth_fdir_status { + RTE_ETH_FDIR_NO_REPORT_STATUS = 0, /**< Report nothing. */ + RTE_ETH_FDIR_REPORT_ID, /**< Only report FD ID. */ + RTE_ETH_FDIR_REPORT_ID_FLEX_4, /**< Report FD ID and 4 flex bytes. */ + RTE_ETH_FDIR_REPORT_FLEX_8, /**< Report 8 flex bytes. */ +}; + +/** + * A structure used to define an action when match FDIR packet filter. + */ +struct rte_eth_fdir_action { + uint16_t rx_queue; /**< Queue assigned to if FDIR match. */ + enum rte_eth_fdir_behavior behavior; /**< Behavior will be taken */ + enum rte_eth_fdir_status report_status; /**< Status report option */ + uint8_t flex_off; + /**< If report_status is RTE_ETH_FDIR_REPORT_ID_FLEX_4 or + RTE_ETH_FDIR_REPORT_FLEX_8, flex_off specifies where the reported + flex bytes start from in flexible payload. */ +}; + +/** + * A structure used to define the flow director filter entry by filter_ctrl API + * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_ADD and + * RTE_ETH_FILTER_DELETE operations. + */ +struct rte_eth_fdir_filter { + uint32_t soft_id; + /**< ID, an unique value is required when deal with FDIR entry */ + struct rte_eth_fdir_input input; /**< Input set */ + struct rte_eth_fdir_action action; /**< Action taken when match */ +}; + +/** + * A structure used to configure FDIR masks that are used by the device + * to match the various fields of RX packet headers. + */ +struct rte_eth_fdir_masks { + uint16_t vlan_tci_mask; + struct rte_eth_ipv4_flow ipv4_mask; + struct rte_eth_ipv6_flow ipv6_mask; + uint16_t src_port_mask; + uint16_t dst_port_mask; + uint8_t mac_addr_byte_mask; /** Per byte MAC address mask */ + uint32_t tunnel_id_mask; /** tunnel ID mask */ + uint8_t tunnel_type_mask; +}; + +/** + * Payload type + */ +enum rte_eth_payload_type { + RTE_ETH_PAYLOAD_UNKNOWN = 0, + RTE_ETH_RAW_PAYLOAD, + RTE_ETH_L2_PAYLOAD, + RTE_ETH_L3_PAYLOAD, + RTE_ETH_L4_PAYLOAD, + RTE_ETH_PAYLOAD_MAX = 8, +}; + +/** + * A structure used to select bytes extracted from the protocol layers to + * flexible payload for filter + */ +struct rte_eth_flex_payload_cfg { + enum rte_eth_payload_type type; /**< Payload type */ + uint16_t src_offset[RTE_ETH_FDIR_MAX_FLEXLEN]; + /**< Offset in bytes from the beginning of packet's payload + src_offset[i] indicates the flexbyte i's offset in original + packet payload. This value should be less than + flex_payload_limit in struct rte_eth_fdir_info.*/ +}; + +/** + * A structure used to define FDIR masks for flexible payload + * for each flow type + */ +struct rte_eth_fdir_flex_mask { + uint16_t flow_type; + uint8_t mask[RTE_ETH_FDIR_MAX_FLEXLEN]; + /**< Mask for the whole flexible payload */ +}; + +/** + * A structure used to define all flexible payload related setting + * include flex payload and flex mask + */ +struct rte_eth_fdir_flex_conf { + uint16_t nb_payloads; /**< The number of following payload cfg */ + uint16_t nb_flexmasks; /**< The number of following mask */ + struct rte_eth_flex_payload_cfg flex_set[RTE_ETH_PAYLOAD_MAX]; + /**< Flex payload configuration for each payload type */ + struct rte_eth_fdir_flex_mask flex_mask[RTE_ETH_FLOW_MAX]; + /**< Flex mask configuration for each flow type */ +}; + +/** + * Flow Director setting modes: none, signature or perfect. + */ +enum rte_fdir_mode { + RTE_FDIR_MODE_NONE = 0, /**< Disable FDIR support. */ + RTE_FDIR_MODE_SIGNATURE, /**< Enable FDIR signature filter mode. */ + RTE_FDIR_MODE_PERFECT, /**< Enable FDIR perfect filter mode. */ + RTE_FDIR_MODE_PERFECT_MAC_VLAN, /**< Enable FDIR filter mode - MAC VLAN. */ + RTE_FDIR_MODE_PERFECT_TUNNEL, /**< Enable FDIR filter mode - tunnel. */ +}; + +#define UINT32_BIT (CHAR_BIT * sizeof(uint32_t)) +#define RTE_FLOW_MASK_ARRAY_SIZE \ + (RTE_ALIGN(RTE_ETH_FLOW_MAX, UINT32_BIT)/UINT32_BIT) + +/** + * A structure used to get the information of flow director filter. + * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_INFO operation. + * It includes the mode, flexible payload configuration information, + * capabilities and supported flow types, flexible payload characters. + * It can be gotten to help taking specific configurations per device. + */ +struct rte_eth_fdir_info { + enum rte_fdir_mode mode; /**< Flow director mode */ + struct rte_eth_fdir_masks mask; + /** Flex payload configuration information */ + struct rte_eth_fdir_flex_conf flex_conf; + uint32_t guarant_spc; /**< Guaranteed spaces.*/ + uint32_t best_spc; /**< Best effort spaces.*/ + /** Bit mask for every supported flow type. */ + uint32_t flow_types_mask[RTE_FLOW_MASK_ARRAY_SIZE]; + uint32_t max_flexpayload; /**< Total flex payload in bytes. */ + /** Flexible payload unit in bytes. Size and alignments of all flex + payload segments should be multiplies of this value. */ + uint32_t flex_payload_unit; + /** Max number of flexible payload continuous segments. + Each segment should be a multiple of flex_payload_unit.*/ + uint32_t max_flex_payload_segment_num; + /** Maximum src_offset in bytes allowed. It indicates that + src_offset[i] in struct rte_eth_flex_payload_cfg should be less + than this value. */ + uint16_t flex_payload_limit; + /** Flex bitmask unit in bytes. Size of flex bitmasks should be a + multiply of this value. */ + uint32_t flex_bitmask_unit; + /** Max supported size of flex bitmasks in flex_bitmask_unit */ + uint32_t max_flex_bitmask_num; +}; + +/** + * A structure used to define the statistics of flow director. + * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_STATS operation. + */ +struct rte_eth_fdir_stats { + uint32_t collision; /**< Number of filters with collision. */ + uint32_t free; /**< Number of free filters. */ + uint32_t maxhash; + /**< The lookup hash value of the added filter that updated the value + of the MAXLEN field */ + uint32_t maxlen; /**< Longest linked list of filters. */ + uint64_t add; /**< Number of added filters. */ + uint64_t remove; /**< Number of removed filters. */ + uint64_t f_add; /**< Number of failed added filters. */ + uint64_t f_remove; /**< Number of failed removed filters. */ + uint32_t guarant_cnt; /**< Number of filters in guaranteed spaces. */ + uint32_t best_cnt; /**< Number of filters in best effort spaces. */ +}; + +/** + * Flow Director filter information types. + */ +enum rte_eth_fdir_filter_info_type { + RTE_ETH_FDIR_FILTER_INFO_TYPE_UNKNOWN = 0, + /** Flow Director filter input set configuration */ + RTE_ETH_FDIR_FILTER_INPUT_SET_SELECT, + RTE_ETH_FDIR_FILTER_INFO_TYPE_MAX, +}; + +/** + * A structure used to set FDIR filter information, to support filter type + * of 'RTE_ETH_FILTER_FDIR' RTE_ETH_FDIR_FILTER_INPUT_SET_SELECT operation. + */ +struct rte_eth_fdir_filter_info { + enum rte_eth_fdir_filter_info_type info_type; /**< Information type */ + /** Details of fdir filter information */ + union { + /** Flow Director input set configuration per port */ + struct rte_eth_input_set_conf input_set_conf; + } info; +}; + +/** + * Hash filter information types. + * - RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT is for getting/setting the + * information/configuration of 'symmetric hash enable' per port. + * - RTE_ETH_HASH_FILTER_GLOBAL_CONFIG is for getting/setting the global + * configurations of hash filters. Those global configurations are valid + * for all ports of the same NIC. + * - RTE_ETH_HASH_FILTER_INPUT_SET_SELECT is for setting the global + * hash input set fields + */ +enum rte_eth_hash_filter_info_type { + RTE_ETH_HASH_FILTER_INFO_TYPE_UNKNOWN = 0, + /** Symmetric hash enable per port */ + RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT, + /** Configure globally for hash filter */ + RTE_ETH_HASH_FILTER_GLOBAL_CONFIG, + /** Global Hash filter input set configuration */ + RTE_ETH_HASH_FILTER_INPUT_SET_SELECT, + RTE_ETH_HASH_FILTER_INFO_TYPE_MAX, +}; + +/** + * Hash function types. + */ +enum rte_eth_hash_function { + RTE_ETH_HASH_FUNCTION_DEFAULT = 0, + RTE_ETH_HASH_FUNCTION_TOEPLITZ, /**< Toeplitz */ + RTE_ETH_HASH_FUNCTION_SIMPLE_XOR, /**< Simple XOR */ + RTE_ETH_HASH_FUNCTION_MAX, +}; + +#define RTE_SYM_HASH_MASK_ARRAY_SIZE \ + (RTE_ALIGN(RTE_ETH_FLOW_MAX, UINT32_BIT)/UINT32_BIT) +/** + * A structure used to set or get global hash function configurations which + * include symmetric hash enable per flow type and hash function type. + * Each bit in sym_hash_enable_mask[] indicates if the symmetric hash of the + * corresponding flow type is enabled or not. + * Each bit in valid_bit_mask[] indicates if the corresponding bit in + * sym_hash_enable_mask[] is valid or not. For the configurations gotten, it + * also means if the flow type is supported by hardware or not. + */ +struct rte_eth_hash_global_conf { + enum rte_eth_hash_function hash_func; /**< Hash function type */ + /** Bit mask for symmetric hash enable per flow type */ + uint32_t sym_hash_enable_mask[RTE_SYM_HASH_MASK_ARRAY_SIZE]; + /** Bit mask indicates if the corresponding bit is valid */ + uint32_t valid_bit_mask[RTE_SYM_HASH_MASK_ARRAY_SIZE]; +}; + +/** + * A structure used to set or get hash filter information, to support filter + * type of 'RTE_ETH_FILTER_HASH' and its operations. + */ +struct rte_eth_hash_filter_info { + enum rte_eth_hash_filter_info_type info_type; /**< Information type */ + /** Details of hash filter information */ + union { + /** For RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT */ + uint8_t enable; + /** Global configurations of hash filter */ + struct rte_eth_hash_global_conf global_conf; + /** Global configurations of hash filter input set */ + struct rte_eth_input_set_conf input_set_conf; + } info; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ETH_CTRL_H_ */ diff --git a/src/dpdk22/lib/librte_ether/rte_ethdev.c b/src/dpdk22/lib/librte_ether/rte_ethdev.c new file mode 100644 index 00000000..ed971b49 --- /dev/null +++ b/src/dpdk22/lib/librte_ether/rte_ethdev.c @@ -0,0 +1,3241 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_ether.h" +#include "rte_ethdev.h" + +static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data"; +struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS]; +static struct rte_eth_dev_data *rte_eth_dev_data; +static uint8_t nb_ports; + +/* spinlock for eth device callbacks */ +static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER; + +/* store statistics names and its offset in stats structure */ +struct rte_eth_xstats_name_off { + char name[RTE_ETH_XSTATS_NAME_SIZE]; + unsigned offset; +}; + +static const struct rte_eth_xstats_name_off rte_stats_strings[] = { + {"rx_good_packets", offsetof(struct rte_eth_stats, ipackets)}, + {"tx_good_packets", offsetof(struct rte_eth_stats, opackets)}, + {"rx_good_bytes", offsetof(struct rte_eth_stats, ibytes)}, + {"tx_good_bytes", offsetof(struct rte_eth_stats, obytes)}, + {"rx_errors", offsetof(struct rte_eth_stats, ierrors)}, + {"tx_errors", offsetof(struct rte_eth_stats, oerrors)}, + {"rx_mbuf_allocation_errors", offsetof(struct rte_eth_stats, + rx_nombuf)}, +}; + +#define RTE_NB_STATS (sizeof(rte_stats_strings) / sizeof(rte_stats_strings[0])) + +static const struct rte_eth_xstats_name_off rte_rxq_stats_strings[] = { + {"packets", offsetof(struct rte_eth_stats, q_ipackets)}, + {"bytes", offsetof(struct rte_eth_stats, q_ibytes)}, + {"errors", offsetof(struct rte_eth_stats, q_errors)}, +}; + +#define RTE_NB_RXQ_STATS (sizeof(rte_rxq_stats_strings) / \ + sizeof(rte_rxq_stats_strings[0])) + +static const struct rte_eth_xstats_name_off rte_txq_stats_strings[] = { + {"packets", offsetof(struct rte_eth_stats, q_opackets)}, + {"bytes", offsetof(struct rte_eth_stats, q_obytes)}, +}; +#define RTE_NB_TXQ_STATS (sizeof(rte_txq_stats_strings) / \ + sizeof(rte_txq_stats_strings[0])) + + +/** + * The user application callback description. + * + * It contains callback address to be registered by user application, + * the pointer to the parameters for callback, and the event type. + */ +struct rte_eth_dev_callback { + TAILQ_ENTRY(rte_eth_dev_callback) next; /**< Callbacks list */ + rte_eth_dev_cb_fn cb_fn; /**< Callback address */ + void *cb_arg; /**< Parameter for callback */ + enum rte_eth_event_type event; /**< Interrupt event type */ + uint32_t active; /**< Callback is executing */ +}; + +enum { + STAT_QMAP_TX = 0, + STAT_QMAP_RX +}; + +enum { + DEV_DETACHED = 0, + DEV_ATTACHED +}; + +static void +rte_eth_dev_data_alloc(void) +{ + const unsigned flags = 0; + const struct rte_memzone *mz; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + mz = rte_memzone_reserve(MZ_RTE_ETH_DEV_DATA, + RTE_MAX_ETHPORTS * sizeof(*rte_eth_dev_data), + rte_socket_id(), flags); + } else + mz = rte_memzone_lookup(MZ_RTE_ETH_DEV_DATA); + if (mz == NULL) + rte_panic("Cannot allocate memzone for ethernet port data\n"); + + rte_eth_dev_data = mz->addr; + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + memset(rte_eth_dev_data, 0, + RTE_MAX_ETHPORTS * sizeof(*rte_eth_dev_data)); +} + +struct rte_eth_dev * +rte_eth_dev_allocated(const char *name) +{ + unsigned i; + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if ((rte_eth_devices[i].attached == DEV_ATTACHED) && + strcmp(rte_eth_devices[i].data->name, name) == 0) + return &rte_eth_devices[i]; + } + return NULL; +} + +static uint8_t +rte_eth_dev_find_free_port(void) +{ + unsigned i; + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (rte_eth_devices[i].attached == DEV_DETACHED) + return i; + } + return RTE_MAX_ETHPORTS; +} + +struct rte_eth_dev * +rte_eth_dev_allocate(const char *name, enum rte_eth_dev_type type) +{ + uint8_t port_id; + struct rte_eth_dev *eth_dev; + + port_id = rte_eth_dev_find_free_port(); + if (port_id == RTE_MAX_ETHPORTS) { + RTE_PMD_DEBUG_TRACE("Reached maximum number of Ethernet ports\n"); + return NULL; + } + + if (rte_eth_dev_data == NULL) + rte_eth_dev_data_alloc(); + + if (rte_eth_dev_allocated(name) != NULL) { + RTE_PMD_DEBUG_TRACE("Ethernet Device with name %s already allocated!\n", + name); + return NULL; + } + + eth_dev = &rte_eth_devices[port_id]; + eth_dev->data = &rte_eth_dev_data[port_id]; + snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name); + eth_dev->data->port_id = port_id; + eth_dev->attached = DEV_ATTACHED; + eth_dev->dev_type = type; + nb_ports++; + return eth_dev; +} + +static int +rte_eth_dev_create_unique_device_name(char *name, size_t size, + struct rte_pci_device *pci_dev) +{ + int ret; + + if ((name == NULL) || (pci_dev == NULL)) + return -EINVAL; + + ret = snprintf(name, size, "%d:%d.%d", + pci_dev->addr.bus, pci_dev->addr.devid, + pci_dev->addr.function); + if (ret < 0) + return ret; + return 0; +} + +int +rte_eth_dev_release_port(struct rte_eth_dev *eth_dev) +{ + if (eth_dev == NULL) + return -EINVAL; + + eth_dev->attached = DEV_DETACHED; + nb_ports--; + return 0; +} + +static int +rte_eth_dev_init(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev) +{ + struct eth_driver *eth_drv; + struct rte_eth_dev *eth_dev; + char ethdev_name[RTE_ETH_NAME_MAX_LEN]; + + int diag; + + eth_drv = (struct eth_driver *)pci_drv; + + /* Create unique Ethernet device name using PCI address */ + rte_eth_dev_create_unique_device_name(ethdev_name, + sizeof(ethdev_name), pci_dev); + + eth_dev = rte_eth_dev_allocate(ethdev_name, RTE_ETH_DEV_PCI); + if (eth_dev == NULL) + return -ENOMEM; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eth_dev->data->dev_private = rte_zmalloc("ethdev private structure", + eth_drv->dev_private_size, + RTE_CACHE_LINE_SIZE); + if (eth_dev->data->dev_private == NULL) + rte_panic("Cannot allocate memzone for private port data\n"); + } + eth_dev->pci_dev = pci_dev; + eth_dev->driver = eth_drv; + eth_dev->data->rx_mbuf_alloc_failed = 0; + + /* init user callbacks */ + TAILQ_INIT(&(eth_dev->link_intr_cbs)); + + /* + * Set the default MTU. + */ + eth_dev->data->mtu = ETHER_MTU; + + /* Invoke PMD device initialization function */ + diag = (*eth_drv->eth_dev_init)(eth_dev); + if (diag == 0) + return 0; + + RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(vendor_id=0x%u device_id=0x%x) failed\n", + pci_drv->name, + (unsigned) pci_dev->id.vendor_id, + (unsigned) pci_dev->id.device_id); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(eth_dev->data->dev_private); + rte_eth_dev_release_port(eth_dev); + return diag; +} + +static int +rte_eth_dev_uninit(struct rte_pci_device *pci_dev) +{ + const struct eth_driver *eth_drv; + struct rte_eth_dev *eth_dev; + char ethdev_name[RTE_ETH_NAME_MAX_LEN]; + int ret; + + if (pci_dev == NULL) + return -EINVAL; + + /* Create unique Ethernet device name using PCI address */ + rte_eth_dev_create_unique_device_name(ethdev_name, + sizeof(ethdev_name), pci_dev); + + eth_dev = rte_eth_dev_allocated(ethdev_name); + if (eth_dev == NULL) + return -ENODEV; + + eth_drv = (const struct eth_driver *)pci_dev->driver; + + /* Invoke PMD device uninit function */ + if (*eth_drv->eth_dev_uninit) { + ret = (*eth_drv->eth_dev_uninit)(eth_dev); + if (ret) + return ret; + } + + /* free ether device */ + rte_eth_dev_release_port(eth_dev); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(eth_dev->data->dev_private); + + eth_dev->pci_dev = NULL; + eth_dev->driver = NULL; + eth_dev->data = NULL; + + return 0; +} + +/** + * Register an Ethernet [Poll Mode] driver. + * + * Function invoked by the initialization function of an Ethernet driver + * to simultaneously register itself as a PCI driver and as an Ethernet + * Poll Mode Driver. + * Invokes the rte_eal_pci_register() function to register the *pci_drv* + * structure embedded in the *eth_drv* structure, after having stored the + * address of the rte_eth_dev_init() function in the *devinit* field of + * the *pci_drv* structure. + * During the PCI probing phase, the rte_eth_dev_init() function is + * invoked for each PCI [Ethernet device] matching the embedded PCI + * identifiers provided by the driver. + */ +void +rte_eth_driver_register(struct eth_driver *eth_drv) +{ + eth_drv->pci_drv.devinit = rte_eth_dev_init; + eth_drv->pci_drv.devuninit = rte_eth_dev_uninit; + rte_eal_pci_register(ð_drv->pci_drv); +} + +int +rte_eth_dev_is_valid_port(uint8_t port_id) +{ + if (port_id >= RTE_MAX_ETHPORTS || + rte_eth_devices[port_id].attached != DEV_ATTACHED) + return 0; + else + return 1; +} + +int +rte_eth_dev_socket_id(uint8_t port_id) +{ + if (!rte_eth_dev_is_valid_port(port_id)) + return -1; + return rte_eth_devices[port_id].data->numa_node; +} + +uint8_t +rte_eth_dev_count(void) +{ + return nb_ports; +} + +static enum rte_eth_dev_type +rte_eth_dev_get_device_type(uint8_t port_id) +{ + if (!rte_eth_dev_is_valid_port(port_id)) + return RTE_ETH_DEV_UNKNOWN; + return rte_eth_devices[port_id].dev_type; +} + +static int +rte_eth_dev_get_addr_by_port(uint8_t port_id, struct rte_pci_addr *addr) +{ + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + if (addr == NULL) { + RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); + return -EINVAL; + } + + *addr = rte_eth_devices[port_id].pci_dev->addr; + return 0; +} + +static int +rte_eth_dev_get_name_by_port(uint8_t port_id, char *name) +{ + char *tmp; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + if (name == NULL) { + RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); + return -EINVAL; + } + + /* shouldn't check 'rte_eth_devices[i].data', + * because it might be overwritten by VDEV PMD */ + tmp = rte_eth_dev_data[port_id].name; + strcpy(name, tmp); + return 0; +} + +static int +rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id) +{ + int i; + + if (name == NULL) { + RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); + return -EINVAL; + } + + *port_id = RTE_MAX_ETHPORTS; + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + + if (!strncmp(name, + rte_eth_dev_data[i].name, strlen(name))) { + + *port_id = i; + + return 0; + } + } + return -ENODEV; +} + +static int +rte_eth_dev_get_port_by_addr(const struct rte_pci_addr *addr, uint8_t *port_id) +{ + int i; + struct rte_pci_device *pci_dev = NULL; + + if (addr == NULL) { + RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); + return -EINVAL; + } + + *port_id = RTE_MAX_ETHPORTS; + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + + pci_dev = rte_eth_devices[i].pci_dev; + + if (pci_dev && + !rte_eal_compare_pci_addr(&pci_dev->addr, addr)) { + + *port_id = i; + + return 0; + } + } + return -ENODEV; +} + +static int +rte_eth_dev_is_detachable(uint8_t port_id) +{ + uint32_t dev_flags; + + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); + return -EINVAL; + } + + switch (rte_eth_devices[port_id].data->kdrv) { + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + case RTE_KDRV_NIC_UIO: + case RTE_KDRV_NONE: + break; + case RTE_KDRV_VFIO: + default: + return -ENOTSUP; + } + dev_flags = rte_eth_devices[port_id].data->dev_flags; + return !(dev_flags & RTE_ETH_DEV_DETACHABLE); +} + +/* attach the new physical device, then store port_id of the device */ +static int +rte_eth_dev_attach_pdev(struct rte_pci_addr *addr, uint8_t *port_id) +{ + if ((addr == NULL) || (port_id == NULL)) + goto err; + + /* re-construct pci_device_list */ + if (rte_eal_pci_scan()) + goto err; + /* Invoke probe func of the driver can handle the new device. */ + if (rte_eal_pci_probe_one(addr)) + goto err; + + if (rte_eth_dev_get_port_by_addr(addr, port_id)) + goto err; + + return 0; +err: + RTE_LOG(ERR, EAL, "Driver, cannot attach the device\n"); + return -1; +} + +/* detach the new physical device, then store pci_addr of the device */ +static int +rte_eth_dev_detach_pdev(uint8_t port_id, struct rte_pci_addr *addr) +{ + struct rte_pci_addr freed_addr; + struct rte_pci_addr vp; + + if (addr == NULL) + goto err; + + /* check whether the driver supports detach feature, or not */ + if (rte_eth_dev_is_detachable(port_id)) + goto err; + + /* get pci address by port id */ + if (rte_eth_dev_get_addr_by_port(port_id, &freed_addr)) + goto err; + + /* Zeroed pci addr means the port comes from virtual device */ + vp.domain = vp.bus = vp.devid = vp.function = 0; + if (rte_eal_compare_pci_addr(&vp, &freed_addr) == 0) + goto err; + + /* invoke devuninit func of the pci driver, + * also remove the device from pci_device_list */ + if (rte_eal_pci_detach(&freed_addr)) + goto err; + + *addr = freed_addr; + return 0; +err: + RTE_LOG(ERR, EAL, "Driver, cannot detach the device\n"); + return -1; +} + +/* attach the new virtual device, then store port_id of the device */ +static int +rte_eth_dev_attach_vdev(const char *vdevargs, uint8_t *port_id) +{ + char *name = NULL, *args = NULL; + int ret = -1; + + if ((vdevargs == NULL) || (port_id == NULL)) + goto end; + + /* parse vdevargs, then retrieve device name and args */ + if (rte_eal_parse_devargs_str(vdevargs, &name, &args)) + goto end; + + /* walk around dev_driver_list to find the driver of the device, + * then invoke probe function of the driver. + * rte_eal_vdev_init() updates port_id allocated after + * initialization. + */ + if (rte_eal_vdev_init(name, args)) + goto end; + + if (rte_eth_dev_get_port_by_name(name, port_id)) + goto end; + + ret = 0; +end: + if (name) + free(name); + if (args) + free(args); + + if (ret < 0) + RTE_LOG(ERR, EAL, "Driver, cannot attach the device\n"); + return ret; +} + +/* detach the new virtual device, then store the name of the device */ +static int +rte_eth_dev_detach_vdev(uint8_t port_id, char *vdevname) +{ + char name[RTE_ETH_NAME_MAX_LEN]; + + if (vdevname == NULL) + goto err; + + /* check whether the driver supports detach feature, or not */ + if (rte_eth_dev_is_detachable(port_id)) + goto err; + + /* get device name by port id */ + if (rte_eth_dev_get_name_by_port(port_id, name)) + goto err; + /* walk around dev_driver_list to find the driver of the device, + * then invoke uninit function of the driver */ + if (rte_eal_vdev_uninit(name)) + goto err; + + strncpy(vdevname, name, sizeof(name)); + return 0; +err: + RTE_LOG(ERR, EAL, "Driver, cannot detach the device\n"); + return -1; +} + +/* attach the new device, then store port_id of the device */ +int +rte_eth_dev_attach(const char *devargs, uint8_t *port_id) +{ + struct rte_pci_addr addr; + + if ((devargs == NULL) || (port_id == NULL)) + return -EINVAL; + + if (eal_parse_pci_DomBDF(devargs, &addr) == 0) + return rte_eth_dev_attach_pdev(&addr, port_id); + else + return rte_eth_dev_attach_vdev(devargs, port_id); +} + +/* detach the device, then store the name of the device */ +int +rte_eth_dev_detach(uint8_t port_id, char *name) +{ + struct rte_pci_addr addr; + int ret; + + if (name == NULL) + return -EINVAL; + + if (rte_eth_dev_get_device_type(port_id) == RTE_ETH_DEV_PCI) { + ret = rte_eth_dev_get_addr_by_port(port_id, &addr); + if (ret < 0) + return ret; + + ret = rte_eth_dev_detach_pdev(port_id, &addr); + if (ret == 0) + snprintf(name, RTE_ETH_NAME_MAX_LEN, + "%04x:%02x:%02x.%d", + addr.domain, addr.bus, + addr.devid, addr.function); + + return ret; + } else + return rte_eth_dev_detach_vdev(port_id, name); +} + +static int +rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) +{ + uint16_t old_nb_queues = dev->data->nb_rx_queues; + void **rxq; + unsigned i; + + if (dev->data->rx_queues == NULL) { /* first time configuration */ + dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues", + sizeof(dev->data->rx_queues[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (dev->data->rx_queues == NULL) { + dev->data->nb_rx_queues = 0; + return -(ENOMEM); + } + } else { /* re-configure */ + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP); + + rxq = dev->data->rx_queues; + + for (i = nb_queues; i < old_nb_queues; i++) + (*dev->dev_ops->rx_queue_release)(rxq[i]); + rxq = rte_realloc(rxq, sizeof(rxq[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (rxq == NULL) + return -(ENOMEM); + if (nb_queues > old_nb_queues) { + uint16_t new_qs = nb_queues - old_nb_queues; + + memset(rxq + old_nb_queues, 0, + sizeof(rxq[0]) * new_qs); + } + + dev->data->rx_queues = rxq; + + } + dev->data->nb_rx_queues = nb_queues; + return 0; +} + +int +rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id) +{ + struct rte_eth_dev *dev; + + /* This function is only safe when called from the primary process + * in a multi-process setup*/ + RTE_PROC_PRIMARY_OR_ERR_RET(-E_RTE_SECONDARY); + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (rx_queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP); + + if (dev->data->rx_queue_state[rx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) { + RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8 + " already started\n", + rx_queue_id, port_id); + return 0; + } + + return dev->dev_ops->rx_queue_start(dev, rx_queue_id); + +} + +int +rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id) +{ + struct rte_eth_dev *dev; + + /* This function is only safe when called from the primary process + * in a multi-process setup*/ + RTE_PROC_PRIMARY_OR_ERR_RET(-E_RTE_SECONDARY); + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (rx_queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_stop, -ENOTSUP); + + if (dev->data->rx_queue_state[rx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) { + RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8 + " already stopped\n", + rx_queue_id, port_id); + return 0; + } + + return dev->dev_ops->rx_queue_stop(dev, rx_queue_id); + +} + +int +rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id) +{ + struct rte_eth_dev *dev; + + /* This function is only safe when called from the primary process + * in a multi-process setup*/ + RTE_PROC_PRIMARY_OR_ERR_RET(-E_RTE_SECONDARY); + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (tx_queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_start, -ENOTSUP); + + if (dev->data->tx_queue_state[tx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) { + RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8 + " already started\n", + tx_queue_id, port_id); + return 0; + } + + return dev->dev_ops->tx_queue_start(dev, tx_queue_id); + +} + +int +rte_eth_dev_tx_queue_stop(uint8_t port_id, uint16_t tx_queue_id) +{ + struct rte_eth_dev *dev; + + /* This function is only safe when called from the primary process + * in a multi-process setup*/ + RTE_PROC_PRIMARY_OR_ERR_RET(-E_RTE_SECONDARY); + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (tx_queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_stop, -ENOTSUP); + + if (dev->data->tx_queue_state[tx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) { + RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8 + " already stopped\n", + tx_queue_id, port_id); + return 0; + } + + return dev->dev_ops->tx_queue_stop(dev, tx_queue_id); + +} + +static int +rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) +{ + uint16_t old_nb_queues = dev->data->nb_tx_queues; + void **txq; + unsigned i; + + if (dev->data->tx_queues == NULL) { /* first time configuration */ + dev->data->tx_queues = rte_zmalloc("ethdev->tx_queues", + sizeof(dev->data->tx_queues[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (dev->data->tx_queues == NULL) { + dev->data->nb_tx_queues = 0; + return -(ENOMEM); + } + } else { /* re-configure */ + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP); + + txq = dev->data->tx_queues; + + for (i = nb_queues; i < old_nb_queues; i++) + (*dev->dev_ops->tx_queue_release)(txq[i]); + txq = rte_realloc(txq, sizeof(txq[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (txq == NULL) + return -ENOMEM; + if (nb_queues > old_nb_queues) { + uint16_t new_qs = nb_queues - old_nb_queues; + + memset(txq + old_nb_queues, 0, + sizeof(txq[0]) * new_qs); + } + + dev->data->tx_queues = txq; + + } + dev->data->nb_tx_queues = nb_queues; + return 0; +} + +int +rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, + const struct rte_eth_conf *dev_conf) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + int diag; + + /* This function is only safe when called from the primary process + * in a multi-process setup*/ + RTE_PROC_PRIMARY_OR_ERR_RET(-E_RTE_SECONDARY); + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + if (nb_rx_q > RTE_MAX_QUEUES_PER_PORT) { + RTE_PMD_DEBUG_TRACE( + "Number of RX queues requested (%u) is greater than max supported(%d)\n", + nb_rx_q, RTE_MAX_QUEUES_PER_PORT); + return -EINVAL; + } + + if (nb_tx_q > RTE_MAX_QUEUES_PER_PORT) { + RTE_PMD_DEBUG_TRACE( + "Number of TX queues requested (%u) is greater than max supported(%d)\n", + nb_tx_q, RTE_MAX_QUEUES_PER_PORT); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_configure, -ENOTSUP); + + if (dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be stopped to allow configuration\n", port_id); + return -EBUSY; + } + + /* + * Check that the numbers of RX and TX queues are not greater + * than the maximum number of RX and TX queues supported by the + * configured device. + */ + (*dev->dev_ops->dev_infos_get)(dev, &dev_info); + if (nb_rx_q > dev_info.max_rx_queues) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_rx_queues=%d > %d\n", + port_id, nb_rx_q, dev_info.max_rx_queues); + return -EINVAL; + } + if (nb_rx_q == 0) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_rx_q == 0\n", port_id); + return -EINVAL; + } + + if (nb_tx_q > dev_info.max_tx_queues) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_tx_queues=%d > %d\n", + port_id, nb_tx_q, dev_info.max_tx_queues); + return -EINVAL; + } + if (nb_tx_q == 0) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_tx_q == 0\n", port_id); + return -EINVAL; + } + + /* Copy the dev_conf parameter into the dev structure */ + memcpy(&dev->data->dev_conf, dev_conf, sizeof(dev->data->dev_conf)); + + /* + * If link state interrupt is enabled, check that the + * device supports it. + */ + if ((dev_conf->intr_conf.lsc == 1) && + (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) { + RTE_PMD_DEBUG_TRACE("driver %s does not support lsc\n", + dev->data->drv_name); + return -EINVAL; + } + + /* + * If jumbo frames are enabled, check that the maximum RX packet + * length is supported by the configured device. + */ + if (dev_conf->rxmode.jumbo_frame == 1) { + if (dev_conf->rxmode.max_rx_pkt_len > + dev_info.max_rx_pktlen) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u" + " > max valid value %u\n", + port_id, + (unsigned)dev_conf->rxmode.max_rx_pkt_len, + (unsigned)dev_info.max_rx_pktlen); + return -EINVAL; + } else if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u" + " < min valid value %u\n", + port_id, + (unsigned)dev_conf->rxmode.max_rx_pkt_len, + (unsigned)ETHER_MIN_LEN); + return -EINVAL; + } + } else { + if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN || + dev_conf->rxmode.max_rx_pkt_len > ETHER_MAX_LEN) + /* Use default value */ + dev->data->dev_conf.rxmode.max_rx_pkt_len = + ETHER_MAX_LEN; + } + + /* + * Setup new number of RX/TX queues and reconfigure device. + */ + diag = rte_eth_dev_rx_queue_config(dev, nb_rx_q); + if (diag != 0) { + RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_rx_queue_config = %d\n", + port_id, diag); + return diag; + } + + diag = rte_eth_dev_tx_queue_config(dev, nb_tx_q); + if (diag != 0) { + RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_tx_queue_config = %d\n", + port_id, diag); + rte_eth_dev_rx_queue_config(dev, 0); + return diag; + } + + diag = (*dev->dev_ops->dev_configure)(dev); + if (diag != 0) { + RTE_PMD_DEBUG_TRACE("port%d dev_configure = %d\n", + port_id, diag); + rte_eth_dev_rx_queue_config(dev, 0); + rte_eth_dev_tx_queue_config(dev, 0); + return diag; + } + + return 0; +} + +static void +rte_eth_dev_config_restore(uint8_t port_id) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct ether_addr addr; + uint16_t i; + uint32_t pool = 0; + + dev = &rte_eth_devices[port_id]; + + rte_eth_dev_info_get(port_id, &dev_info); + + if (RTE_ETH_DEV_SRIOV(dev).active) + pool = RTE_ETH_DEV_SRIOV(dev).def_vmdq_idx; + + /* replay MAC address configuration */ + for (i = 0; i < dev_info.max_mac_addrs; i++) { + addr = dev->data->mac_addrs[i]; + + /* skip zero address */ + if (is_zero_ether_addr(&addr)) + continue; + + /* add address to the hardware */ + if (*dev->dev_ops->mac_addr_add && + (dev->data->mac_pool_sel[i] & (1ULL << pool))) + (*dev->dev_ops->mac_addr_add)(dev, &addr, i, pool); + else { + RTE_PMD_DEBUG_TRACE("port %d: MAC address array not supported\n", + port_id); + /* exit the loop but not return an error */ + break; + } + } + + /* replay promiscuous configuration */ + if (rte_eth_promiscuous_get(port_id) == 1) + rte_eth_promiscuous_enable(port_id); + else if (rte_eth_promiscuous_get(port_id) == 0) + rte_eth_promiscuous_disable(port_id); + + /* replay all multicast configuration */ + if (rte_eth_allmulticast_get(port_id) == 1) + rte_eth_allmulticast_enable(port_id); + else if (rte_eth_allmulticast_get(port_id) == 0) + rte_eth_allmulticast_disable(port_id); +} + +int +rte_eth_dev_start(uint8_t port_id) +{ + struct rte_eth_dev *dev; + int diag; + + /* This function is only safe when called from the primary process + * in a multi-process setup*/ + RTE_PROC_PRIMARY_OR_ERR_RET(-E_RTE_SECONDARY); + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP); + + if (dev->data->dev_started != 0) { + RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu8 + " already started\n", + port_id); + return 0; + } + + diag = (*dev->dev_ops->dev_start)(dev); + if (diag == 0) + dev->data->dev_started = 1; + else + return diag; + + rte_eth_dev_config_restore(port_id); + + if (dev->data->dev_conf.intr_conf.lsc == 0) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->link_update, -ENOTSUP); + (*dev->dev_ops->link_update)(dev, 0); + } + return 0; +} + +void +rte_eth_dev_stop(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + /* This function is only safe when called from the primary process + * in a multi-process setup*/ + RTE_PROC_PRIMARY_OR_RET(); + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_stop); + + if (dev->data->dev_started == 0) { + RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu8 + " already stopped\n", + port_id); + return; + } + + dev->data->dev_started = 0; + (*dev->dev_ops->dev_stop)(dev); +} + +int +rte_eth_dev_set_link_up(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + /* This function is only safe when called from the primary process + * in a multi-process setup*/ + RTE_PROC_PRIMARY_OR_ERR_RET(-E_RTE_SECONDARY); + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_set_link_up, -ENOTSUP); + return (*dev->dev_ops->dev_set_link_up)(dev); +} + +int +rte_eth_dev_set_link_down(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + /* This function is only safe when called from the primary process + * in a multi-process setup*/ + RTE_PROC_PRIMARY_OR_ERR_RET(-E_RTE_SECONDARY); + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_set_link_down, -ENOTSUP); + return (*dev->dev_ops->dev_set_link_down)(dev); +} + +void +rte_eth_dev_close(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + /* This function is only safe when called from the primary process + * in a multi-process setup*/ + RTE_PROC_PRIMARY_OR_RET(); + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_close); + dev->data->dev_started = 0; + (*dev->dev_ops->dev_close)(dev); + + rte_free(dev->data->rx_queues); + dev->data->rx_queues = NULL; + rte_free(dev->data->tx_queues); + dev->data->tx_queues = NULL; +} + +int +rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, + uint16_t nb_rx_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) +{ + int ret; + uint32_t mbp_buf_size; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + /* This function is only safe when called from the primary process + * in a multi-process setup*/ + RTE_PROC_PRIMARY_OR_ERR_RET(-E_RTE_SECONDARY); + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (rx_queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id); + return -EINVAL; + } + + if (dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be stopped to allow configuration\n", port_id); + return -EBUSY; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_setup, -ENOTSUP); + + /* + * Check the size of the mbuf data buffer. + * This value must be provided in the private data of the memory pool. + * First check that the memory pool has a valid private data. + */ + rte_eth_dev_info_get(port_id, &dev_info); + if (mp->private_data_size < sizeof(struct rte_pktmbuf_pool_private)) { + RTE_PMD_DEBUG_TRACE("%s private_data_size %d < %d\n", + mp->name, (int) mp->private_data_size, + (int) sizeof(struct rte_pktmbuf_pool_private)); + return -ENOSPC; + } + mbp_buf_size = rte_pktmbuf_data_room_size(mp); + + if ((mbp_buf_size - RTE_PKTMBUF_HEADROOM) < dev_info.min_rx_bufsize) { + RTE_PMD_DEBUG_TRACE("%s mbuf_data_room_size %d < %d " + "(RTE_PKTMBUF_HEADROOM=%d + min_rx_bufsize(dev)" + "=%d)\n", + mp->name, + (int)mbp_buf_size, + (int)(RTE_PKTMBUF_HEADROOM + + dev_info.min_rx_bufsize), + (int)RTE_PKTMBUF_HEADROOM, + (int)dev_info.min_rx_bufsize); + return -EINVAL; + } + + if (nb_rx_desc > dev_info.rx_desc_lim.nb_max || + nb_rx_desc < dev_info.rx_desc_lim.nb_min || + nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) { + + RTE_PMD_DEBUG_TRACE("Invalid value for nb_rx_desc(=%hu), " + "should be: <= %hu, = %hu, and a product of %hu\n", + nb_rx_desc, + dev_info.rx_desc_lim.nb_max, + dev_info.rx_desc_lim.nb_min, + dev_info.rx_desc_lim.nb_align); + return -EINVAL; + } + + if (rx_conf == NULL) + rx_conf = &dev_info.default_rxconf; + + ret = (*dev->dev_ops->rx_queue_setup)(dev, rx_queue_id, nb_rx_desc, + socket_id, rx_conf, mp); + if (!ret) { + if (!dev->data->min_rx_buf_size || + dev->data->min_rx_buf_size > mbp_buf_size) + dev->data->min_rx_buf_size = mbp_buf_size; + } + + return ret; +} + +int +rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id, + uint16_t nb_tx_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + /* This function is only safe when called from the primary process + * in a multi-process setup*/ + RTE_PROC_PRIMARY_OR_ERR_RET(-E_RTE_SECONDARY); + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (tx_queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id); + return -EINVAL; + } + + if (dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be stopped to allow configuration\n", port_id); + return -EBUSY; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_setup, -ENOTSUP); + + rte_eth_dev_info_get(port_id, &dev_info); + + if (nb_tx_desc > dev_info.tx_desc_lim.nb_max || + nb_tx_desc < dev_info.tx_desc_lim.nb_min || + nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) { + RTE_PMD_DEBUG_TRACE("Invalid value for nb_tx_desc(=%hu), " + "should be: <= %hu, = %hu, and a product of %hu\n", + nb_tx_desc, + dev_info.tx_desc_lim.nb_max, + dev_info.tx_desc_lim.nb_min, + dev_info.tx_desc_lim.nb_align); + return -EINVAL; + } + + if (tx_conf == NULL) + tx_conf = &dev_info.default_txconf; + + return (*dev->dev_ops->tx_queue_setup)(dev, tx_queue_id, nb_tx_desc, + socket_id, tx_conf); +} + +void +rte_eth_promiscuous_enable(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->promiscuous_enable); + (*dev->dev_ops->promiscuous_enable)(dev); + dev->data->promiscuous = 1; +} + +void +rte_eth_promiscuous_disable(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->promiscuous_disable); + dev->data->promiscuous = 0; + (*dev->dev_ops->promiscuous_disable)(dev); +} + +int +rte_eth_promiscuous_get(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + return dev->data->promiscuous; +} + +void +rte_eth_allmulticast_enable(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->allmulticast_enable); + (*dev->dev_ops->allmulticast_enable)(dev); + dev->data->all_multicast = 1; +} + +void +rte_eth_allmulticast_disable(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->allmulticast_disable); + dev->data->all_multicast = 0; + (*dev->dev_ops->allmulticast_disable)(dev); +} + +int +rte_eth_allmulticast_get(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + return dev->data->all_multicast; +} + +static inline int +rte_eth_dev_atomic_read_link_status(struct rte_eth_dev *dev, + struct rte_eth_link *link) +{ + struct rte_eth_link *dst = link; + struct rte_eth_link *src = &(dev->data->dev_link); + + if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst, + *(uint64_t *)src) == 0) + return -1; + + return 0; +} + +void +rte_eth_link_get(uint8_t port_id, struct rte_eth_link *eth_link) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + if (dev->data->dev_conf.intr_conf.lsc != 0) + rte_eth_dev_atomic_read_link_status(dev, eth_link); + else { + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->link_update); + (*dev->dev_ops->link_update)(dev, 1); + *eth_link = dev->data->dev_link; + } +} + +void +rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *eth_link) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + if (dev->data->dev_conf.intr_conf.lsc != 0) + rte_eth_dev_atomic_read_link_status(dev, eth_link); + else { + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->link_update); + (*dev->dev_ops->link_update)(dev, 0); + *eth_link = dev->data->dev_link; + } +} + +int +rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + memset(stats, 0, sizeof(*stats)); + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_get, -ENOTSUP); + (*dev->dev_ops->stats_get)(dev, stats); + stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; + return 0; +} + +void +rte_eth_stats_reset(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->stats_reset); + (*dev->dev_ops->stats_reset)(dev); + dev->data->rx_mbuf_alloc_failed = 0; +} + +/* retrieve ethdev extended statistics */ +int +rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstats *xstats, + unsigned n) +{ + struct rte_eth_stats eth_stats; + struct rte_eth_dev *dev; + unsigned count = 0, i, q; + signed xcount = 0; + uint64_t val, *stats_ptr; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + /* Return generic statistics */ + count = RTE_NB_STATS + (dev->data->nb_rx_queues * RTE_NB_RXQ_STATS) + + (dev->data->nb_tx_queues * RTE_NB_TXQ_STATS); + + /* implemented by the driver */ + if (dev->dev_ops->xstats_get != NULL) { + /* Retrieve the xstats from the driver at the end of the + * xstats struct. + */ + xcount = (*dev->dev_ops->xstats_get)(dev, &xstats[count], + (n > count) ? n - count : 0); + + if (xcount < 0) + return xcount; + } + + if (n < count + xcount) + return count + xcount; + + /* now fill the xstats structure */ + count = 0; + rte_eth_stats_get(port_id, ð_stats); + + /* global stats */ + for (i = 0; i < RTE_NB_STATS; i++) { + stats_ptr = RTE_PTR_ADD(ð_stats, + rte_stats_strings[i].offset); + val = *stats_ptr; + snprintf(xstats[count].name, sizeof(xstats[count].name), + "%s", rte_stats_strings[i].name); + xstats[count++].value = val; + } + + /* per-rxq stats */ + for (q = 0; q < dev->data->nb_rx_queues; q++) { + for (i = 0; i < RTE_NB_RXQ_STATS; i++) { + stats_ptr = RTE_PTR_ADD(ð_stats, + rte_rxq_stats_strings[i].offset + + q * sizeof(uint64_t)); + val = *stats_ptr; + snprintf(xstats[count].name, sizeof(xstats[count].name), + "rx_q%u_%s", q, + rte_rxq_stats_strings[i].name); + xstats[count++].value = val; + } + } + + /* per-txq stats */ + for (q = 0; q < dev->data->nb_tx_queues; q++) { + for (i = 0; i < RTE_NB_TXQ_STATS; i++) { + stats_ptr = RTE_PTR_ADD(ð_stats, + rte_txq_stats_strings[i].offset + + q * sizeof(uint64_t)); + val = *stats_ptr; + snprintf(xstats[count].name, sizeof(xstats[count].name), + "tx_q%u_%s", q, + rte_txq_stats_strings[i].name); + xstats[count++].value = val; + } + } + + return count + xcount; +} + +/* reset ethdev extended statistics */ +void +rte_eth_xstats_reset(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + /* implemented by the driver */ + if (dev->dev_ops->xstats_reset != NULL) { + (*dev->dev_ops->xstats_reset)(dev); + return; + } + + /* fallback to default */ + rte_eth_stats_reset(port_id); +} + +static int +set_queue_stats_mapping(uint8_t port_id, uint16_t queue_id, uint8_t stat_idx, + uint8_t is_rx) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_stats_mapping_set, -ENOTSUP); + return (*dev->dev_ops->queue_stats_mapping_set) + (dev, queue_id, stat_idx, is_rx); +} + + +int +rte_eth_dev_set_tx_queue_stats_mapping(uint8_t port_id, uint16_t tx_queue_id, + uint8_t stat_idx) +{ + return set_queue_stats_mapping(port_id, tx_queue_id, stat_idx, + STAT_QMAP_TX); +} + + +int +rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, uint16_t rx_queue_id, + uint8_t stat_idx) +{ + return set_queue_stats_mapping(port_id, rx_queue_id, stat_idx, + STAT_QMAP_RX); +} + + +void +rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info) +{ + struct rte_eth_dev *dev; + const struct rte_eth_desc_lim lim = { + .nb_max = UINT16_MAX, + .nb_min = 0, + .nb_align = 1, + }; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + memset(dev_info, 0, sizeof(struct rte_eth_dev_info)); + dev_info->rx_desc_lim = lim; + dev_info->tx_desc_lim = lim; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get); + (*dev->dev_ops->dev_infos_get)(dev, dev_info); + dev_info->pci_dev = dev->pci_dev; + dev_info->driver_name = dev->data->drv_name; +} + +void +rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + ether_addr_copy(&dev->data->mac_addrs[0], mac_addr); +} + + +int +rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + *mtu = dev->data->mtu; + return 0; +} + +int +rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu) +{ + int ret; + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mtu_set, -ENOTSUP); + + ret = (*dev->dev_ops->mtu_set)(dev, mtu); + if (!ret) + dev->data->mtu = mtu; + + return ret; +} + +int +rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + if (!(dev->data->dev_conf.rxmode.hw_vlan_filter)) { + RTE_PMD_DEBUG_TRACE("port %d: vlan-filtering disabled\n", port_id); + return -ENOSYS; + } + + if (vlan_id > 4095) { + RTE_PMD_DEBUG_TRACE("(port_id=%d) invalid vlan_id=%u > 4095\n", + port_id, (unsigned) vlan_id); + return -EINVAL; + } + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP); + + return (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on); +} + +int +rte_eth_dev_set_vlan_strip_on_queue(uint8_t port_id, uint16_t rx_queue_id, int on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + if (rx_queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid rx_queue_id=%d\n", port_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_strip_queue_set, -ENOTSUP); + (*dev->dev_ops->vlan_strip_queue_set)(dev, rx_queue_id, on); + + return 0; +} + +int +rte_eth_dev_set_vlan_ether_type(uint8_t port_id, uint16_t tpid) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_tpid_set, -ENOTSUP); + (*dev->dev_ops->vlan_tpid_set)(dev, tpid); + + return 0; +} + +int +rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask) +{ + struct rte_eth_dev *dev; + int ret = 0; + int mask = 0; + int cur, org = 0; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + /*check which option changed by application*/ + cur = !!(offload_mask & ETH_VLAN_STRIP_OFFLOAD); + org = !!(dev->data->dev_conf.rxmode.hw_vlan_strip); + if (cur != org) { + dev->data->dev_conf.rxmode.hw_vlan_strip = (uint8_t)cur; + mask |= ETH_VLAN_STRIP_MASK; + } + + cur = !!(offload_mask & ETH_VLAN_FILTER_OFFLOAD); + org = !!(dev->data->dev_conf.rxmode.hw_vlan_filter); + if (cur != org) { + dev->data->dev_conf.rxmode.hw_vlan_filter = (uint8_t)cur; + mask |= ETH_VLAN_FILTER_MASK; + } + + cur = !!(offload_mask & ETH_VLAN_EXTEND_OFFLOAD); + org = !!(dev->data->dev_conf.rxmode.hw_vlan_extend); + if (cur != org) { + dev->data->dev_conf.rxmode.hw_vlan_extend = (uint8_t)cur; + mask |= ETH_VLAN_EXTEND_MASK; + } + + /*no change*/ + if (mask == 0) + return ret; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_offload_set, -ENOTSUP); + (*dev->dev_ops->vlan_offload_set)(dev, mask); + + return ret; +} + +int +rte_eth_dev_get_vlan_offload(uint8_t port_id) +{ + struct rte_eth_dev *dev; + int ret = 0; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + if (dev->data->dev_conf.rxmode.hw_vlan_strip) + ret |= ETH_VLAN_STRIP_OFFLOAD; + + if (dev->data->dev_conf.rxmode.hw_vlan_filter) + ret |= ETH_VLAN_FILTER_OFFLOAD; + + if (dev->data->dev_conf.rxmode.hw_vlan_extend) + ret |= ETH_VLAN_EXTEND_OFFLOAD; + + return ret; +} + +int +rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_pvid_set, -ENOTSUP); + (*dev->dev_ops->vlan_pvid_set)(dev, pvid, on); + + return 0; +} + +int +rte_eth_dev_flow_ctrl_get(uint8_t port_id, struct rte_eth_fc_conf *fc_conf) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->flow_ctrl_get, -ENOTSUP); + memset(fc_conf, 0, sizeof(*fc_conf)); + return (*dev->dev_ops->flow_ctrl_get)(dev, fc_conf); +} + +int +rte_eth_dev_flow_ctrl_set(uint8_t port_id, struct rte_eth_fc_conf *fc_conf) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if ((fc_conf->send_xon != 0) && (fc_conf->send_xon != 1)) { + RTE_PMD_DEBUG_TRACE("Invalid send_xon, only 0/1 allowed\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->flow_ctrl_set, -ENOTSUP); + return (*dev->dev_ops->flow_ctrl_set)(dev, fc_conf); +} + +int +rte_eth_dev_priority_flow_ctrl_set(uint8_t port_id, struct rte_eth_pfc_conf *pfc_conf) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if (pfc_conf->priority > (ETH_DCB_NUM_USER_PRIORITIES - 1)) { + RTE_PMD_DEBUG_TRACE("Invalid priority, only 0-7 allowed\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + /* High water, low water validation are device specific */ + if (*dev->dev_ops->priority_flow_ctrl_set) + return (*dev->dev_ops->priority_flow_ctrl_set)(dev, pfc_conf); + return -ENOTSUP; +} + +static int +rte_eth_check_reta_mask(struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + uint16_t i, num; + + if (!reta_conf) + return -EINVAL; + + if (reta_size != RTE_ALIGN(reta_size, RTE_RETA_GROUP_SIZE)) { + RTE_PMD_DEBUG_TRACE("Invalid reta size, should be %u aligned\n", + RTE_RETA_GROUP_SIZE); + return -EINVAL; + } + + num = reta_size / RTE_RETA_GROUP_SIZE; + for (i = 0; i < num; i++) { + if (reta_conf[i].mask) + return 0; + } + + return -EINVAL; +} + +static int +rte_eth_check_reta_entry(struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size, + uint8_t max_rxq) +{ + uint16_t i, idx, shift; + + if (!reta_conf) + return -EINVAL; + + if (max_rxq == 0) { + RTE_PMD_DEBUG_TRACE("No receive queue is available\n"); + return -EINVAL; + } + + for (i = 0; i < reta_size; i++) { + idx = i / RTE_RETA_GROUP_SIZE; + shift = i % RTE_RETA_GROUP_SIZE; + if ((reta_conf[idx].mask & (1ULL << shift)) && + (reta_conf[idx].reta[shift] >= max_rxq)) { + RTE_PMD_DEBUG_TRACE("reta_conf[%u]->reta[%u]: %u exceeds " + "the maximum rxq index: %u\n", idx, shift, + reta_conf[idx].reta[shift], max_rxq); + return -EINVAL; + } + } + + return 0; +} + +int +rte_eth_dev_rss_reta_update(uint8_t port_id, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + struct rte_eth_dev *dev; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + /* Check mask bits */ + ret = rte_eth_check_reta_mask(reta_conf, reta_size); + if (ret < 0) + return ret; + + dev = &rte_eth_devices[port_id]; + + /* Check entry value */ + ret = rte_eth_check_reta_entry(reta_conf, reta_size, + dev->data->nb_rx_queues); + if (ret < 0) + return ret; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->reta_update, -ENOTSUP); + return (*dev->dev_ops->reta_update)(dev, reta_conf, reta_size); +} + +int +rte_eth_dev_rss_reta_query(uint8_t port_id, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + struct rte_eth_dev *dev; + int ret; + + if (port_id >= nb_ports) { + RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); + return -ENODEV; + } + + /* Check mask bits */ + ret = rte_eth_check_reta_mask(reta_conf, reta_size); + if (ret < 0) + return ret; + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->reta_query, -ENOTSUP); + return (*dev->dev_ops->reta_query)(dev, reta_conf, reta_size); +} + +int +rte_eth_dev_rss_hash_update(uint8_t port_id, struct rte_eth_rss_conf *rss_conf) +{ + struct rte_eth_dev *dev; + uint16_t rss_hash_protos; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + rss_hash_protos = rss_conf->rss_hf; + if ((rss_hash_protos != 0) && + ((rss_hash_protos & ETH_RSS_PROTO_MASK) == 0)) { + RTE_PMD_DEBUG_TRACE("Invalid rss_hash_protos=0x%x\n", + rss_hash_protos); + return -EINVAL; + } + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_update, -ENOTSUP); + return (*dev->dev_ops->rss_hash_update)(dev, rss_conf); +} + +int +rte_eth_dev_rss_hash_conf_get(uint8_t port_id, + struct rte_eth_rss_conf *rss_conf) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_conf_get, -ENOTSUP); + return (*dev->dev_ops->rss_hash_conf_get)(dev, rss_conf); +} + +int +rte_eth_dev_udp_tunnel_add(uint8_t port_id, + struct rte_eth_udp_tunnel *udp_tunnel) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if (udp_tunnel == NULL) { + RTE_PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n"); + return -EINVAL; + } + + if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) { + RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->udp_tunnel_add, -ENOTSUP); + return (*dev->dev_ops->udp_tunnel_add)(dev, udp_tunnel); +} + +int +rte_eth_dev_udp_tunnel_delete(uint8_t port_id, + struct rte_eth_udp_tunnel *udp_tunnel) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + if (udp_tunnel == NULL) { + RTE_PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n"); + return -EINVAL; + } + + if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) { + RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n"); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->udp_tunnel_del, -ENOTSUP); + return (*dev->dev_ops->udp_tunnel_del)(dev, udp_tunnel); +} + +int +rte_eth_led_on(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_led_on, -ENOTSUP); + return (*dev->dev_ops->dev_led_on)(dev); +} + +int +rte_eth_led_off(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_led_off, -ENOTSUP); + return (*dev->dev_ops->dev_led_off)(dev); +} + +/* + * Returns index into MAC address array of addr. Use 00:00:00:00:00:00 to find + * an empty spot. + */ +static int +get_mac_addr_index(uint8_t port_id, const struct ether_addr *addr) +{ + struct rte_eth_dev_info dev_info; + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + unsigned i; + + rte_eth_dev_info_get(port_id, &dev_info); + + for (i = 0; i < dev_info.max_mac_addrs; i++) + if (memcmp(addr, &dev->data->mac_addrs[i], ETHER_ADDR_LEN) == 0) + return i; + + return -1; +} + +static const struct ether_addr null_mac_addr; + +int +rte_eth_dev_mac_addr_add(uint8_t port_id, struct ether_addr *addr, + uint32_t pool) +{ + struct rte_eth_dev *dev; + int index; + uint64_t pool_mask; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_add, -ENOTSUP); + + if (is_zero_ether_addr(addr)) { + RTE_PMD_DEBUG_TRACE("port %d: Cannot add NULL MAC address\n", + port_id); + return -EINVAL; + } + if (pool >= ETH_64_POOLS) { + RTE_PMD_DEBUG_TRACE("pool id must be 0-%d\n", ETH_64_POOLS - 1); + return -EINVAL; + } + + index = get_mac_addr_index(port_id, addr); + if (index < 0) { + index = get_mac_addr_index(port_id, &null_mac_addr); + if (index < 0) { + RTE_PMD_DEBUG_TRACE("port %d: MAC address array full\n", + port_id); + return -ENOSPC; + } + } else { + pool_mask = dev->data->mac_pool_sel[index]; + + /* Check if both MAC address and pool is already there, and do nothing */ + if (pool_mask & (1ULL << pool)) + return 0; + } + + /* Update NIC */ + (*dev->dev_ops->mac_addr_add)(dev, addr, index, pool); + + /* Update address in NIC data structure */ + ether_addr_copy(addr, &dev->data->mac_addrs[index]); + + /* Update pool bitmap in NIC data structure */ + dev->data->mac_pool_sel[index] |= (1ULL << pool); + + return 0; +} + +int +rte_eth_dev_mac_addr_remove(uint8_t port_id, struct ether_addr *addr) +{ + struct rte_eth_dev *dev; + int index; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_remove, -ENOTSUP); + + index = get_mac_addr_index(port_id, addr); + if (index == 0) { + RTE_PMD_DEBUG_TRACE("port %d: Cannot remove default MAC address\n", port_id); + return -EADDRINUSE; + } else if (index < 0) + return 0; /* Do nothing if address wasn't found */ + + /* Update NIC */ + (*dev->dev_ops->mac_addr_remove)(dev, index); + + /* Update address in NIC data structure */ + ether_addr_copy(&null_mac_addr, &dev->data->mac_addrs[index]); + + /* reset pool bitmap */ + dev->data->mac_pool_sel[index] = 0; + + return 0; +} + +int +rte_eth_dev_default_mac_addr_set(uint8_t port_id, struct ether_addr *addr) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (!is_valid_assigned_ether_addr(addr)) + return -EINVAL; + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_set, -ENOTSUP); + + /* Update default address in NIC data structure */ + ether_addr_copy(addr, &dev->data->mac_addrs[0]); + + (*dev->dev_ops->mac_addr_set)(dev, addr); + + return 0; +} + +int +rte_eth_dev_set_vf_rxmode(uint8_t port_id, uint16_t vf, + uint16_t rx_mode, uint8_t on) +{ + uint16_t num_vfs; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + rte_eth_dev_info_get(port_id, &dev_info); + + num_vfs = dev_info.max_vfs; + if (vf > num_vfs) { + RTE_PMD_DEBUG_TRACE("set VF RX mode:invalid VF id %d\n", vf); + return -EINVAL; + } + + if (rx_mode == 0) { + RTE_PMD_DEBUG_TRACE("set VF RX mode:mode mask ca not be zero\n"); + return -EINVAL; + } + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rx_mode, -ENOTSUP); + return (*dev->dev_ops->set_vf_rx_mode)(dev, vf, rx_mode, on); +} + +/* + * Returns index into MAC address array of addr. Use 00:00:00:00:00:00 to find + * an empty spot. + */ +static int +get_hash_mac_addr_index(uint8_t port_id, const struct ether_addr *addr) +{ + struct rte_eth_dev_info dev_info; + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + unsigned i; + + rte_eth_dev_info_get(port_id, &dev_info); + if (!dev->data->hash_mac_addrs) + return -1; + + for (i = 0; i < dev_info.max_hash_mac_addrs; i++) + if (memcmp(addr, &dev->data->hash_mac_addrs[i], + ETHER_ADDR_LEN) == 0) + return i; + + return -1; +} + +int +rte_eth_dev_uc_hash_table_set(uint8_t port_id, struct ether_addr *addr, + uint8_t on) +{ + int index; + int ret; + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + if (is_zero_ether_addr(addr)) { + RTE_PMD_DEBUG_TRACE("port %d: Cannot add NULL MAC address\n", + port_id); + return -EINVAL; + } + + index = get_hash_mac_addr_index(port_id, addr); + /* Check if it's already there, and do nothing */ + if ((index >= 0) && (on)) + return 0; + + if (index < 0) { + if (!on) { + RTE_PMD_DEBUG_TRACE("port %d: the MAC address was not " + "set in UTA\n", port_id); + return -EINVAL; + } + + index = get_hash_mac_addr_index(port_id, &null_mac_addr); + if (index < 0) { + RTE_PMD_DEBUG_TRACE("port %d: MAC address array full\n", + port_id); + return -ENOSPC; + } + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->uc_hash_table_set, -ENOTSUP); + ret = (*dev->dev_ops->uc_hash_table_set)(dev, addr, on); + if (ret == 0) { + /* Update address in NIC data structure */ + if (on) + ether_addr_copy(addr, + &dev->data->hash_mac_addrs[index]); + else + ether_addr_copy(&null_mac_addr, + &dev->data->hash_mac_addrs[index]); + } + + return ret; +} + +int +rte_eth_dev_uc_all_hash_table_set(uint8_t port_id, uint8_t on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->uc_all_hash_table_set, -ENOTSUP); + return (*dev->dev_ops->uc_all_hash_table_set)(dev, on); +} + +int +rte_eth_dev_set_vf_rx(uint8_t port_id, uint16_t vf, uint8_t on) +{ + uint16_t num_vfs; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + rte_eth_dev_info_get(port_id, &dev_info); + + num_vfs = dev_info.max_vfs; + if (vf > num_vfs) { + RTE_PMD_DEBUG_TRACE("port %d: invalid vf id\n", port_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rx, -ENOTSUP); + return (*dev->dev_ops->set_vf_rx)(dev, vf, on); +} + +int +rte_eth_dev_set_vf_tx(uint8_t port_id, uint16_t vf, uint8_t on) +{ + uint16_t num_vfs; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + rte_eth_dev_info_get(port_id, &dev_info); + + num_vfs = dev_info.max_vfs; + if (vf > num_vfs) { + RTE_PMD_DEBUG_TRACE("set pool tx:invalid pool id=%d\n", vf); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_tx, -ENOTSUP); + return (*dev->dev_ops->set_vf_tx)(dev, vf, on); +} + +int +rte_eth_dev_set_vf_vlan_filter(uint8_t port_id, uint16_t vlan_id, + uint64_t vf_mask, uint8_t vlan_on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + if (vlan_id > ETHER_MAX_VLAN_ID) { + RTE_PMD_DEBUG_TRACE("VF VLAN filter:invalid VLAN id=%d\n", + vlan_id); + return -EINVAL; + } + + if (vf_mask == 0) { + RTE_PMD_DEBUG_TRACE("VF VLAN filter:pool_mask can not be 0\n"); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_vlan_filter, -ENOTSUP); + return (*dev->dev_ops->set_vf_vlan_filter)(dev, vlan_id, + vf_mask, vlan_on); +} + +int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, + uint16_t tx_rate) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct rte_eth_link link; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + rte_eth_dev_info_get(port_id, &dev_info); + link = dev->data->dev_link; + + if (queue_idx > dev_info.max_tx_queues) { + RTE_PMD_DEBUG_TRACE("set queue rate limit:port %d: " + "invalid queue id=%d\n", port_id, queue_idx); + return -EINVAL; + } + + if (tx_rate > link.link_speed) { + RTE_PMD_DEBUG_TRACE("set queue rate limit:invalid tx_rate=%d, " + "bigger than link speed= %d\n", + tx_rate, link.link_speed); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_queue_rate_limit, -ENOTSUP); + return (*dev->dev_ops->set_queue_rate_limit)(dev, queue_idx, tx_rate); +} + +int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf, uint16_t tx_rate, + uint64_t q_msk) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct rte_eth_link link; + + if (q_msk == 0) + return 0; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + rte_eth_dev_info_get(port_id, &dev_info); + link = dev->data->dev_link; + + if (vf > dev_info.max_vfs) { + RTE_PMD_DEBUG_TRACE("set VF rate limit:port %d: " + "invalid vf id=%d\n", port_id, vf); + return -EINVAL; + } + + if (tx_rate > link.link_speed) { + RTE_PMD_DEBUG_TRACE("set VF rate limit:invalid tx_rate=%d, " + "bigger than link speed= %d\n", + tx_rate, link.link_speed); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rate_limit, -ENOTSUP); + return (*dev->dev_ops->set_vf_rate_limit)(dev, vf, tx_rate, q_msk); +} + +int +rte_eth_mirror_rule_set(uint8_t port_id, + struct rte_eth_mirror_conf *mirror_conf, + uint8_t rule_id, uint8_t on) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if (mirror_conf->rule_type == 0) { + RTE_PMD_DEBUG_TRACE("mirror rule type can not be 0.\n"); + return -EINVAL; + } + + if (mirror_conf->dst_pool >= ETH_64_POOLS) { + RTE_PMD_DEBUG_TRACE("Invalid dst pool, pool id must be 0-%d\n", + ETH_64_POOLS - 1); + return -EINVAL; + } + + if ((mirror_conf->rule_type & (ETH_MIRROR_VIRTUAL_POOL_UP | + ETH_MIRROR_VIRTUAL_POOL_DOWN)) && + (mirror_conf->pool_mask == 0)) { + RTE_PMD_DEBUG_TRACE("Invalid mirror pool, pool mask can not be 0.\n"); + return -EINVAL; + } + + if ((mirror_conf->rule_type & ETH_MIRROR_VLAN) && + mirror_conf->vlan.vlan_mask == 0) { + RTE_PMD_DEBUG_TRACE("Invalid vlan mask, vlan mask can not be 0.\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mirror_rule_set, -ENOTSUP); + + return (*dev->dev_ops->mirror_rule_set)(dev, mirror_conf, rule_id, on); +} + +int +rte_eth_mirror_rule_reset(uint8_t port_id, uint8_t rule_id) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mirror_rule_reset, -ENOTSUP); + + return (*dev->dev_ops->mirror_rule_reset)(dev, rule_id); +} + +int +rte_eth_dev_callback_register(uint8_t port_id, + enum rte_eth_event_type event, + rte_eth_dev_cb_fn cb_fn, void *cb_arg) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_callback *user_cb; + + if (!cb_fn) + return -EINVAL; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + rte_spinlock_lock(&rte_eth_dev_cb_lock); + + TAILQ_FOREACH(user_cb, &(dev->link_intr_cbs), next) { + if (user_cb->cb_fn == cb_fn && + user_cb->cb_arg == cb_arg && + user_cb->event == event) { + break; + } + } + + /* create a new callback. */ + if (user_cb == NULL) + user_cb = rte_zmalloc("INTR_USER_CALLBACK", + sizeof(struct rte_eth_dev_callback), 0); + if (user_cb != NULL) { + user_cb->cb_fn = cb_fn; + user_cb->cb_arg = cb_arg; + user_cb->event = event; + TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), user_cb, next); + } + + rte_spinlock_unlock(&rte_eth_dev_cb_lock); + return (user_cb == NULL) ? -ENOMEM : 0; +} + +int +rte_eth_dev_callback_unregister(uint8_t port_id, + enum rte_eth_event_type event, + rte_eth_dev_cb_fn cb_fn, void *cb_arg) +{ + int ret; + struct rte_eth_dev *dev; + struct rte_eth_dev_callback *cb, *next; + + if (!cb_fn) + return -EINVAL; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + rte_spinlock_lock(&rte_eth_dev_cb_lock); + + ret = 0; + for (cb = TAILQ_FIRST(&dev->link_intr_cbs); cb != NULL; cb = next) { + + next = TAILQ_NEXT(cb, next); + + if (cb->cb_fn != cb_fn || cb->event != event || + (cb->cb_arg != (void *)-1 && + cb->cb_arg != cb_arg)) + continue; + + /* + * if this callback is not executing right now, + * then remove it. + */ + if (cb->active == 0) { + TAILQ_REMOVE(&(dev->link_intr_cbs), cb, next); + rte_free(cb); + } else { + ret = -EAGAIN; + } + } + + rte_spinlock_unlock(&rte_eth_dev_cb_lock); + return ret; +} + +void +_rte_eth_dev_callback_process(struct rte_eth_dev *dev, + enum rte_eth_event_type event) +{ + struct rte_eth_dev_callback *cb_lst; + struct rte_eth_dev_callback dev_cb; + + rte_spinlock_lock(&rte_eth_dev_cb_lock); + TAILQ_FOREACH(cb_lst, &(dev->link_intr_cbs), next) { + if (cb_lst->cb_fn == NULL || cb_lst->event != event) + continue; + dev_cb = *cb_lst; + cb_lst->active = 1; + rte_spinlock_unlock(&rte_eth_dev_cb_lock); + dev_cb.cb_fn(dev->data->port_id, dev_cb.event, + dev_cb.cb_arg); + rte_spinlock_lock(&rte_eth_dev_cb_lock); + cb_lst->active = 0; + } + rte_spinlock_unlock(&rte_eth_dev_cb_lock); +} + +int +rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data) +{ + uint32_t vec; + struct rte_eth_dev *dev; + struct rte_intr_handle *intr_handle; + uint16_t qid; + int rc; + + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_PMD_DEBUG_TRACE("Invalid port_id=%u\n", port_id); + return -ENODEV; + } + + dev = &rte_eth_devices[port_id]; + intr_handle = &dev->pci_dev->intr_handle; + if (!intr_handle->intr_vec) { + RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n"); + return -EPERM; + } + + for (qid = 0; qid < dev->data->nb_rx_queues; qid++) { + vec = intr_handle->intr_vec[qid]; + rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data); + if (rc && rc != -EEXIST) { + RTE_PMD_DEBUG_TRACE("p %u q %u rx ctl error" + " op %d epfd %d vec %u\n", + port_id, qid, op, epfd, vec); + } + } + + return 0; +} + +const struct rte_memzone * +rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name, + uint16_t queue_id, size_t size, unsigned align, + int socket_id) +{ + char z_name[RTE_MEMZONE_NAMESIZE]; + const struct rte_memzone *mz; + + snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", + dev->driver->pci_drv.name, ring_name, + dev->data->port_id, queue_id); + + mz = rte_memzone_lookup(z_name); + if (mz) + return mz; + + if (rte_xen_dom0_supported()) + return rte_memzone_reserve_bounded(z_name, size, socket_id, + 0, align, RTE_PGSIZE_2M); + else + return rte_memzone_reserve_aligned(z_name, size, socket_id, + 0, align); +} + +int +rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id, + int epfd, int op, void *data) +{ + uint32_t vec; + struct rte_eth_dev *dev; + struct rte_intr_handle *intr_handle; + int rc; + + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_PMD_DEBUG_TRACE("Invalid port_id=%u\n", port_id); + return -ENODEV; + } + + dev = &rte_eth_devices[port_id]; + if (queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%u\n", queue_id); + return -EINVAL; + } + + intr_handle = &dev->pci_dev->intr_handle; + if (!intr_handle->intr_vec) { + RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n"); + return -EPERM; + } + + vec = intr_handle->intr_vec[queue_id]; + rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data); + if (rc && rc != -EEXIST) { + RTE_PMD_DEBUG_TRACE("p %u q %u rx ctl error" + " op %d epfd %d vec %u\n", + port_id, queue_id, op, epfd, vec); + return rc; + } + + return 0; +} + +int +rte_eth_dev_rx_intr_enable(uint8_t port_id, + uint16_t queue_id) +{ + struct rte_eth_dev *dev; + + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); + return -ENODEV; + } + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP); + return (*dev->dev_ops->rx_queue_intr_enable)(dev, queue_id); +} + +int +rte_eth_dev_rx_intr_disable(uint8_t port_id, + uint16_t queue_id) +{ + struct rte_eth_dev *dev; + + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); + return -ENODEV; + } + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP); + return (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id); +} + +#ifdef RTE_NIC_BYPASS +int rte_eth_dev_bypass_init(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_init, -ENOTSUP); + (*dev->dev_ops->bypass_init)(dev); + return 0; +} + +int +rte_eth_dev_bypass_state_show(uint8_t port_id, uint32_t *state) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_state_show, -ENOTSUP); + (*dev->dev_ops->bypass_state_show)(dev, state); + return 0; +} + +int +rte_eth_dev_bypass_state_set(uint8_t port_id, uint32_t *new_state) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_state_set, -ENOTSUP); + (*dev->dev_ops->bypass_state_set)(dev, new_state); + return 0; +} + +int +rte_eth_dev_bypass_event_show(uint8_t port_id, uint32_t event, uint32_t *state) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_state_show, -ENOTSUP); + (*dev->dev_ops->bypass_event_show)(dev, event, state); + return 0; +} + +int +rte_eth_dev_bypass_event_store(uint8_t port_id, uint32_t event, uint32_t state) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_event_set, -ENOTSUP); + (*dev->dev_ops->bypass_event_set)(dev, event, state); + return 0; +} + +int +rte_eth_dev_wd_timeout_store(uint8_t port_id, uint32_t timeout) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_wd_timeout_set, -ENOTSUP); + (*dev->dev_ops->bypass_wd_timeout_set)(dev, timeout); + return 0; +} + +int +rte_eth_dev_bypass_ver_show(uint8_t port_id, uint32_t *ver) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_ver_show, -ENOTSUP); + (*dev->dev_ops->bypass_ver_show)(dev, ver); + return 0; +} + +int +rte_eth_dev_bypass_wd_timeout_show(uint8_t port_id, uint32_t *wd_timeout) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_wd_timeout_show, -ENOTSUP); + (*dev->dev_ops->bypass_wd_timeout_show)(dev, wd_timeout); + return 0; +} + +int +rte_eth_dev_bypass_wd_reset(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_wd_reset, -ENOTSUP); + (*dev->dev_ops->bypass_wd_reset)(dev); + return 0; +} +#endif + +int +rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_type) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP); + return (*dev->dev_ops->filter_ctrl)(dev, filter_type, + RTE_ETH_FILTER_NOP, NULL); +} + +int +rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type, + enum rte_filter_op filter_op, void *arg) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP); + return (*dev->dev_ops->filter_ctrl)(dev, filter_type, filter_op, arg); +} + +void * +rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id, + rte_rx_callback_fn fn, void *user_param) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + rte_errno = ENOTSUP; + return NULL; +#endif + /* check input parameters */ + if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) { + rte_errno = EINVAL; + return NULL; + } + + struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0); + + if (cb == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + cb->fn.rx = fn; + cb->param = user_param; + + /* Add the callbacks in fifo order. */ + struct rte_eth_rxtx_callback *tail = + rte_eth_devices[port_id].post_rx_burst_cbs[queue_id]; + + if (!tail) { + rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb; + + } else { + while (tail->next) + tail = tail->next; + tail->next = cb; + } + + return cb; +} + +void * +rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id, + rte_tx_callback_fn fn, void *user_param) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + rte_errno = ENOTSUP; + return NULL; +#endif + /* check input parameters */ + if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) { + rte_errno = EINVAL; + return NULL; + } + + struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0); + + if (cb == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + cb->fn.tx = fn; + cb->param = user_param; + + /* Add the callbacks in fifo order. */ + struct rte_eth_rxtx_callback *tail = + rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id]; + + if (!tail) { + rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id] = cb; + + } else { + while (tail->next) + tail = tail->next; + tail->next = cb; + } + + return cb; +} + +int +rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id, + struct rte_eth_rxtx_callback *user_cb) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + return -ENOTSUP; +#endif + /* Check input parameters. */ + if (!rte_eth_dev_is_valid_port(port_id) || user_cb == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) { + return -EINVAL; + } + + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id]; + struct rte_eth_rxtx_callback *prev_cb; + + /* Reset head pointer and remove user cb if first in the list. */ + if (cb == user_cb) { + dev->post_rx_burst_cbs[queue_id] = user_cb->next; + return 0; + } + + /* Remove the user cb from the callback list. */ + do { + prev_cb = cb; + cb = cb->next; + + if (cb == user_cb) { + prev_cb->next = user_cb->next; + return 0; + } + + } while (cb != NULL); + + /* Callback wasn't found. */ + return -EINVAL; +} + +int +rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id, + struct rte_eth_rxtx_callback *user_cb) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + return -ENOTSUP; +#endif + /* Check input parameters. */ + if (!rte_eth_dev_is_valid_port(port_id) || user_cb == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) { + return -EINVAL; + } + + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id]; + struct rte_eth_rxtx_callback *prev_cb; + + /* Reset head pointer and remove user cb if first in the list. */ + if (cb == user_cb) { + dev->pre_tx_burst_cbs[queue_id] = user_cb->next; + return 0; + } + + /* Remove the user cb from the callback list. */ + do { + prev_cb = cb; + cb = cb->next; + + if (cb == user_cb) { + prev_cb->next = user_cb->next; + return 0; + } + + } while (cb != NULL); + + /* Callback wasn't found. */ + return -EINVAL; +} + +int +rte_eth_rx_queue_info_get(uint8_t port_id, uint16_t queue_id, + struct rte_eth_rxq_info *qinfo) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (qinfo == NULL) + return -EINVAL; + + dev = &rte_eth_devices[port_id]; + if (queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rxq_info_get, -ENOTSUP); + + memset(qinfo, 0, sizeof(*qinfo)); + dev->dev_ops->rxq_info_get(dev, queue_id, qinfo); + return 0; +} + +int +rte_eth_tx_queue_info_get(uint8_t port_id, uint16_t queue_id, + struct rte_eth_txq_info *qinfo) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (qinfo == NULL) + return -EINVAL; + + dev = &rte_eth_devices[port_id]; + if (queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->txq_info_get, -ENOTSUP); + + memset(qinfo, 0, sizeof(*qinfo)); + dev->dev_ops->txq_info_get(dev, queue_id, qinfo); + return 0; +} + +int +rte_eth_dev_set_mc_addr_list(uint8_t port_id, + struct ether_addr *mc_addr_set, + uint32_t nb_mc_addr) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_mc_addr_list, -ENOTSUP); + return dev->dev_ops->set_mc_addr_list(dev, mc_addr_set, nb_mc_addr); +} + +int +rte_eth_timesync_enable(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_enable, -ENOTSUP); + return (*dev->dev_ops->timesync_enable)(dev); +} + +int +rte_eth_timesync_disable(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_disable, -ENOTSUP); + return (*dev->dev_ops->timesync_disable)(dev); +} + +int +rte_eth_timesync_read_rx_timestamp(uint8_t port_id, struct timespec *timestamp, + uint32_t flags) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_rx_timestamp, -ENOTSUP); + return (*dev->dev_ops->timesync_read_rx_timestamp)(dev, timestamp, flags); +} + +int +rte_eth_timesync_read_tx_timestamp(uint8_t port_id, struct timespec *timestamp) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_tx_timestamp, -ENOTSUP); + return (*dev->dev_ops->timesync_read_tx_timestamp)(dev, timestamp); +} + +int +rte_eth_timesync_adjust_time(uint8_t port_id, int64_t delta) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_adjust_time, -ENOTSUP); + return (*dev->dev_ops->timesync_adjust_time)(dev, delta); +} + +int +rte_eth_timesync_read_time(uint8_t port_id, struct timespec *timestamp) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_time, -ENOTSUP); + return (*dev->dev_ops->timesync_read_time)(dev, timestamp); +} + +int +rte_eth_timesync_write_time(uint8_t port_id, const struct timespec *timestamp) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_write_time, -ENOTSUP); + return (*dev->dev_ops->timesync_write_time)(dev, timestamp); +} + +int +rte_eth_dev_get_reg_length(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_reg_length, -ENOTSUP); + return (*dev->dev_ops->get_reg_length)(dev); +} + +int +rte_eth_dev_get_reg_info(uint8_t port_id, struct rte_dev_reg_info *info) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_reg, -ENOTSUP); + return (*dev->dev_ops->get_reg)(dev, info); +} + +int +rte_eth_dev_get_eeprom_length(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_eeprom_length, -ENOTSUP); + return (*dev->dev_ops->get_eeprom_length)(dev); +} + +int +rte_eth_dev_get_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_eeprom, -ENOTSUP); + return (*dev->dev_ops->get_eeprom)(dev, info); +} + +int +rte_eth_dev_set_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_eeprom, -ENOTSUP); + return (*dev->dev_ops->set_eeprom)(dev, info); +} + +int +rte_eth_dev_get_dcb_info(uint8_t port_id, + struct rte_eth_dcb_info *dcb_info) +{ + struct rte_eth_dev *dev; + + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); + return -ENODEV; + } + + dev = &rte_eth_devices[port_id]; + memset(dcb_info, 0, sizeof(struct rte_eth_dcb_info)); + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_dcb_info, -ENOTSUP); + return (*dev->dev_ops->get_dcb_info)(dev, dcb_info); +} + +void +rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, struct rte_pci_device *pci_dev) +{ + if ((eth_dev == NULL) || (pci_dev == NULL)) { + RTE_PMD_DEBUG_TRACE("NULL pointer eth_dev=%p pci_dev=%p\n", + eth_dev, pci_dev); + return; + } + + eth_dev->data->dev_flags = 0; + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_DETACHABLE) + eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE; + + eth_dev->data->kdrv = pci_dev->kdrv; + eth_dev->data->numa_node = pci_dev->numa_node; + eth_dev->data->drv_name = pci_dev->driver->name; +} diff --git a/src/dpdk22/lib/librte_ether/rte_ethdev.h b/src/dpdk22/lib/librte_ether/rte_ethdev.h new file mode 100644 index 00000000..bada8ade --- /dev/null +++ b/src/dpdk22/lib/librte_ether/rte_ethdev.h @@ -0,0 +1,3894 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETHDEV_H_ +#define _RTE_ETHDEV_H_ + +/** + * @file + * + * RTE Ethernet Device API + * + * The Ethernet Device API is composed of two parts: + * + * - The application-oriented Ethernet API that includes functions to setup + * an Ethernet device (configure it, setup its RX and TX queues and start it), + * to get its MAC address, the speed and the status of its physical link, + * to receive and to transmit packets, and so on. + * + * - The driver-oriented Ethernet API that exports a function allowing + * an Ethernet Poll Mode Driver (PMD) to simultaneously register itself as + * an Ethernet device driver and as a PCI driver for a set of matching PCI + * [Ethernet] devices classes. + * + * By default, all the functions of the Ethernet Device API exported by a PMD + * are lock-free functions which assume to not be invoked in parallel on + * different logical cores to work on the same target object. For instance, + * the receive function of a PMD cannot be invoked in parallel on two logical + * cores to poll the same RX queue [of the same port]. Of course, this function + * can be invoked in parallel by different logical cores on different RX queues. + * It is the responsibility of the upper level application to enforce this rule. + * + * If needed, parallel accesses by multiple logical cores to shared queues + * shall be explicitly protected by dedicated inline lock-aware functions + * built on top of their corresponding lock-free functions of the PMD API. + * + * In all functions of the Ethernet API, the Ethernet device is + * designated by an integer >= 0 named the device port identifier. + * + * At the Ethernet driver level, Ethernet devices are represented by a generic + * data structure of type *rte_eth_dev*. + * + * Ethernet devices are dynamically registered during the PCI probing phase + * performed at EAL initialization time. + * When an Ethernet device is being probed, an *rte_eth_dev* structure and + * a new port identifier are allocated for that device. Then, the eth_dev_init() + * function supplied by the Ethernet driver matching the probed PCI + * device is invoked to properly initialize the device. + * + * The role of the device init function consists of resetting the hardware, + * checking access to Non-volatile Memory (NVM), reading the MAC address + * from NVM etc. + * + * If the device init operation is successful, the correspondence between + * the port identifier assigned to the new device and its associated + * *rte_eth_dev* structure is effectively registered. + * Otherwise, both the *rte_eth_dev* structure and the port identifier are + * freed. + * + * The functions exported by the application Ethernet API to setup a device + * designated by its port identifier must be invoked in the following order: + * - rte_eth_dev_configure() + * - rte_eth_tx_queue_setup() + * - rte_eth_rx_queue_setup() + * - rte_eth_dev_start() + * + * Then, the network application can invoke, in any order, the functions + * exported by the Ethernet API to get the MAC address of a given device, to + * get the speed and the status of a device physical link, to receive/transmit + * [burst of] packets, and so on. + * + * If the application wants to change the configuration (i.e. call + * rte_eth_dev_configure(), rte_eth_tx_queue_setup(), or + * rte_eth_rx_queue_setup()), it must call rte_eth_dev_stop() first to stop the + * device and then do the reconfiguration before calling rte_eth_dev_start() + * again. The tramsit and receive functions should not be invoked when the + * device is stopped. + * + * Please note that some configuration is not stored between calls to + * rte_eth_dev_stop()/rte_eth_dev_start(). The following configuration will + * be retained: + * + * - flow control settings + * - receive mode configuration (promiscuous mode, hardware checksum mode, + * RSS/VMDQ settings etc.) + * - VLAN filtering configuration + * - MAC addresses supplied to MAC address array + * - flow director filtering mode (but not filtering rules) + * - NIC queue statistics mappings + * + * Any other configuration will not be stored and will need to be re-entered + * after a call to rte_eth_dev_start(). + * + * Finally, a network application can close an Ethernet device by invoking the + * rte_eth_dev_close() function. + * + * Each function of the application Ethernet API invokes a specific function + * of the PMD that controls the target device designated by its port + * identifier. + * For this purpose, all device-specific functions of an Ethernet driver are + * supplied through a set of pointers contained in a generic structure of type + * *eth_dev_ops*. + * The address of the *eth_dev_ops* structure is stored in the *rte_eth_dev* + * structure by the device init function of the Ethernet driver, which is + * invoked during the PCI probing phase, as explained earlier. + * + * In other words, each function of the Ethernet API simply retrieves the + * *rte_eth_dev* structure associated with the device port identifier and + * performs an indirect invocation of the corresponding driver function + * supplied in the *eth_dev_ops* structure of the *rte_eth_dev* structure. + * + * For performance reasons, the address of the burst-oriented RX and TX + * functions of the Ethernet driver are not contained in the *eth_dev_ops* + * structure. Instead, they are directly stored at the beginning of the + * *rte_eth_dev* structure to avoid an extra indirect memory access during + * their invocation. + * + * RTE ethernet device drivers do not use interrupts for transmitting or + * receiving. Instead, Ethernet drivers export Poll-Mode receive and transmit + * functions to applications. + * Both receive and transmit functions are packet-burst oriented to minimize + * their cost per packet through the following optimizations: + * + * - Sharing among multiple packets the incompressible cost of the + * invocation of receive/transmit functions. + * + * - Enabling receive/transmit functions to take advantage of burst-oriented + * hardware features (L1 cache, prefetch instructions, NIC head/tail + * registers) to minimize the number of CPU cycles per packet, for instance, + * by avoiding useless read memory accesses to ring descriptors, or by + * systematically using arrays of pointers that exactly fit L1 cache line + * boundaries and sizes. + * + * The burst-oriented receive function does not provide any error notification, + * to avoid the corresponding overhead. As a hint, the upper-level application + * might check the status of the device link once being systematically returned + * a 0 value by the receive function of the driver for a given number of tries. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include + +/* Use this macro to check if LRO API is supported */ +#define RTE_ETHDEV_HAS_LRO_SUPPORT + +#include +#include +#include +#include +#include +#include "rte_ether.h" +#include "rte_eth_ctrl.h" +#include "rte_dev_info.h" + +struct rte_mbuf; + +/** + * A structure used to retrieve statistics for an Ethernet port. + */ +struct rte_eth_stats { + uint64_t ipackets; /**< Total number of successfully received packets. */ + uint64_t opackets; /**< Total number of successfully transmitted packets.*/ + uint64_t ibytes; /**< Total number of successfully received bytes. */ + uint64_t obytes; /**< Total number of successfully transmitted bytes. */ + uint64_t imissed; + /**< Total of RX packets dropped by the HW, + * because there are no available mbufs (i.e. RX queues are full). + */ + uint64_t ibadcrc __rte_deprecated; + /**< Deprecated; Total of RX packets with CRC error. */ + uint64_t ibadlen __rte_deprecated; + /**< Deprecated; Total of RX packets with bad length. */ + uint64_t ierrors; /**< Total number of erroneous received packets. */ + uint64_t oerrors; /**< Total number of failed transmitted packets. */ + uint64_t imcasts; + /**< Deprecated; Total number of multicast received packets. */ + uint64_t rx_nombuf; /**< Total number of RX mbuf allocation failures. */ + uint64_t fdirmatch __rte_deprecated; + /**< Deprecated; Total number of RX packets matching a filter. */ + uint64_t fdirmiss __rte_deprecated; + /**< Deprecated; Total number of RX packets not matching any filter. */ + uint64_t tx_pause_xon __rte_deprecated; + /**< Deprecated; Total nb. of XON pause frame sent. */ + uint64_t rx_pause_xon __rte_deprecated; + /**< Deprecated; Total nb. of XON pause frame received. */ + uint64_t tx_pause_xoff __rte_deprecated; + /**< Deprecated; Total nb. of XOFF pause frame sent. */ + uint64_t rx_pause_xoff __rte_deprecated; + /**< Deprecated; Total nb. of XOFF pause frame received. */ + uint64_t q_ipackets[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of queue RX packets. */ + uint64_t q_opackets[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of queue TX packets. */ + uint64_t q_ibytes[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of successfully received queue bytes. */ + uint64_t q_obytes[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of successfully transmitted queue bytes. */ + uint64_t q_errors[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of queue packets received that are dropped. */ + uint64_t ilbpackets; + /**< Total number of good packets received from loopback,VF Only */ + uint64_t olbpackets; + /**< Total number of good packets transmitted to loopback,VF Only */ + uint64_t ilbbytes; + /**< Total number of good bytes received from loopback,VF Only */ + uint64_t olbbytes; + /**< Total number of good bytes transmitted to loopback,VF Only */ +}; + +/** + * A structure used to retrieve link-level information of an Ethernet port. + */ +struct rte_eth_link { + uint16_t link_speed; /**< ETH_LINK_SPEED_[10, 100, 1000, 10000] */ + uint16_t link_duplex; /**< ETH_LINK_[HALF_DUPLEX, FULL_DUPLEX] */ + uint8_t link_status : 1; /**< 1 -> link up, 0 -> link down */ +}__attribute__((aligned(8))); /**< aligned for atomic64 read/write */ + +#define ETH_LINK_SPEED_AUTONEG 0 /**< Auto-negotiate link speed. */ +#define ETH_LINK_SPEED_10 10 /**< 10 megabits/second. */ +#define ETH_LINK_SPEED_100 100 /**< 100 megabits/second. */ +#define ETH_LINK_SPEED_1000 1000 /**< 1 gigabits/second. */ +#define ETH_LINK_SPEED_10000 10000 /**< 10 gigabits/second. */ +#define ETH_LINK_SPEED_10G 10000 /**< alias of 10 gigabits/second. */ +#define ETH_LINK_SPEED_20G 20000 /**< 20 gigabits/second. */ +#define ETH_LINK_SPEED_40G 40000 /**< 40 gigabits/second. */ + +#define ETH_LINK_AUTONEG_DUPLEX 0 /**< Auto-negotiate duplex. */ +#define ETH_LINK_HALF_DUPLEX 1 /**< Half-duplex connection. */ +#define ETH_LINK_FULL_DUPLEX 2 /**< Full-duplex connection. */ + +/** + * A structure used to configure the ring threshold registers of an RX/TX + * queue for an Ethernet port. + */ +struct rte_eth_thresh { + uint8_t pthresh; /**< Ring prefetch threshold. */ + uint8_t hthresh; /**< Ring host threshold. */ + uint8_t wthresh; /**< Ring writeback threshold. */ +}; + +/** + * Simple flags are used for rte_eth_conf.rxmode.mq_mode. + */ +#define ETH_MQ_RX_RSS_FLAG 0x1 +#define ETH_MQ_RX_DCB_FLAG 0x2 +#define ETH_MQ_RX_VMDQ_FLAG 0x4 + +/** + * A set of values to identify what method is to be used to route + * packets to multiple queues. + */ +enum rte_eth_rx_mq_mode { + /** None of DCB,RSS or VMDQ mode */ + ETH_MQ_RX_NONE = 0, + + /** For RX side, only RSS is on */ + ETH_MQ_RX_RSS = ETH_MQ_RX_RSS_FLAG, + /** For RX side,only DCB is on. */ + ETH_MQ_RX_DCB = ETH_MQ_RX_DCB_FLAG, + /** Both DCB and RSS enable */ + ETH_MQ_RX_DCB_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_DCB_FLAG, + + /** Only VMDQ, no RSS nor DCB */ + ETH_MQ_RX_VMDQ_ONLY = ETH_MQ_RX_VMDQ_FLAG, + /** RSS mode with VMDQ */ + ETH_MQ_RX_VMDQ_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_VMDQ_FLAG, + /** Use VMDQ+DCB to route traffic to queues */ + ETH_MQ_RX_VMDQ_DCB = ETH_MQ_RX_VMDQ_FLAG | ETH_MQ_RX_DCB_FLAG, + /** Enable both VMDQ and DCB in VMDq */ + ETH_MQ_RX_VMDQ_DCB_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_DCB_FLAG | + ETH_MQ_RX_VMDQ_FLAG, +}; + +/** + * for rx mq mode backward compatible + */ +#define ETH_RSS ETH_MQ_RX_RSS +#define VMDQ_DCB ETH_MQ_RX_VMDQ_DCB +#define ETH_DCB_RX ETH_MQ_RX_DCB + +/** + * A set of values to identify what method is to be used to transmit + * packets using multi-TCs. + */ +enum rte_eth_tx_mq_mode { + ETH_MQ_TX_NONE = 0, /**< It is in neither DCB nor VT mode. */ + ETH_MQ_TX_DCB, /**< For TX side,only DCB is on. */ + ETH_MQ_TX_VMDQ_DCB, /**< For TX side,both DCB and VT is on. */ + ETH_MQ_TX_VMDQ_ONLY, /**< Only VT on, no DCB */ +}; + +/** + * for tx mq mode backward compatible + */ +#define ETH_DCB_NONE ETH_MQ_TX_NONE +#define ETH_VMDQ_DCB_TX ETH_MQ_TX_VMDQ_DCB +#define ETH_DCB_TX ETH_MQ_TX_DCB + +/** + * A structure used to configure the RX features of an Ethernet port. + */ +struct rte_eth_rxmode { + /** The multi-queue packet distribution mode to be used, e.g. RSS. */ + enum rte_eth_rx_mq_mode mq_mode; + uint32_t max_rx_pkt_len; /**< Only used if jumbo_frame enabled. */ + uint16_t split_hdr_size; /**< hdr buf size (header_split enabled).*/ + uint16_t header_split : 1, /**< Header Split enable. */ + hw_ip_checksum : 1, /**< IP/UDP/TCP checksum offload enable. */ + hw_vlan_filter : 1, /**< VLAN filter enable. */ + hw_vlan_strip : 1, /**< VLAN strip enable. */ + hw_vlan_extend : 1, /**< Extended VLAN enable. */ + jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */ + hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */ + enable_scatter : 1, /**< Enable scatter packets rx handler */ + enable_lro : 1; /**< Enable LRO */ +}; + +/** + * A structure used to configure the Receive Side Scaling (RSS) feature + * of an Ethernet port. + * If not NULL, the *rss_key* pointer of the *rss_conf* structure points + * to an array holding the RSS key to use for hashing specific header + * fields of received packets. The length of this array should be indicated + * by *rss_key_len* below. Otherwise, a default random hash key is used by + * the device driver. + * + * The *rss_key_len* field of the *rss_conf* structure indicates the length + * in bytes of the array pointed by *rss_key*. To be compatible, this length + * will be checked in i40e only. Others assume 40 bytes to be used as before. + * + * The *rss_hf* field of the *rss_conf* structure indicates the different + * types of IPv4/IPv6 packets to which the RSS hashing must be applied. + * Supplying an *rss_hf* equal to zero disables the RSS feature. + */ +struct rte_eth_rss_conf { + uint8_t *rss_key; /**< If not NULL, 40-byte hash key. */ + uint8_t rss_key_len; /**< hash key length in bytes. */ + uint64_t rss_hf; /**< Hash functions to apply - see below. */ +}; + +/* + * The RSS offload types are defined based on flow types which are defined + * in rte_eth_ctrl.h. Different NIC hardwares may support different RSS offload + * types. The supported flow types or RSS offload types can be queried by + * rte_eth_dev_info_get(). + */ +#define ETH_RSS_IPV4 (1ULL << RTE_ETH_FLOW_IPV4) +#define ETH_RSS_FRAG_IPV4 (1ULL << RTE_ETH_FLOW_FRAG_IPV4) +#define ETH_RSS_NONFRAG_IPV4_TCP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_TCP) +#define ETH_RSS_NONFRAG_IPV4_UDP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_UDP) +#define ETH_RSS_NONFRAG_IPV4_SCTP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_SCTP) +#define ETH_RSS_NONFRAG_IPV4_OTHER (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_OTHER) +#define ETH_RSS_IPV6 (1ULL << RTE_ETH_FLOW_IPV6) +#define ETH_RSS_FRAG_IPV6 (1ULL << RTE_ETH_FLOW_FRAG_IPV6) +#define ETH_RSS_NONFRAG_IPV6_TCP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_TCP) +#define ETH_RSS_NONFRAG_IPV6_UDP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_UDP) +#define ETH_RSS_NONFRAG_IPV6_SCTP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_SCTP) +#define ETH_RSS_NONFRAG_IPV6_OTHER (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_OTHER) +#define ETH_RSS_L2_PAYLOAD (1ULL << RTE_ETH_FLOW_L2_PAYLOAD) +#define ETH_RSS_IPV6_EX (1ULL << RTE_ETH_FLOW_IPV6_EX) +#define ETH_RSS_IPV6_TCP_EX (1ULL << RTE_ETH_FLOW_IPV6_TCP_EX) +#define ETH_RSS_IPV6_UDP_EX (1ULL << RTE_ETH_FLOW_IPV6_UDP_EX) + +#define ETH_RSS_IP ( \ + ETH_RSS_IPV4 | \ + ETH_RSS_FRAG_IPV4 | \ + ETH_RSS_NONFRAG_IPV4_OTHER | \ + ETH_RSS_IPV6 | \ + ETH_RSS_FRAG_IPV6 | \ + ETH_RSS_NONFRAG_IPV6_OTHER | \ + ETH_RSS_IPV6_EX) + +#define ETH_RSS_UDP ( \ + ETH_RSS_NONFRAG_IPV4_UDP | \ + ETH_RSS_NONFRAG_IPV6_UDP | \ + ETH_RSS_IPV6_UDP_EX) + +#define ETH_RSS_TCP ( \ + ETH_RSS_NONFRAG_IPV4_TCP | \ + ETH_RSS_NONFRAG_IPV6_TCP | \ + ETH_RSS_IPV6_TCP_EX) + +#define ETH_RSS_SCTP ( \ + ETH_RSS_NONFRAG_IPV4_SCTP | \ + ETH_RSS_NONFRAG_IPV6_SCTP) + +/**< Mask of valid RSS hash protocols */ +#define ETH_RSS_PROTO_MASK ( \ + ETH_RSS_IPV4 | \ + ETH_RSS_FRAG_IPV4 | \ + ETH_RSS_NONFRAG_IPV4_TCP | \ + ETH_RSS_NONFRAG_IPV4_UDP | \ + ETH_RSS_NONFRAG_IPV4_SCTP | \ + ETH_RSS_NONFRAG_IPV4_OTHER | \ + ETH_RSS_IPV6 | \ + ETH_RSS_FRAG_IPV6 | \ + ETH_RSS_NONFRAG_IPV6_TCP | \ + ETH_RSS_NONFRAG_IPV6_UDP | \ + ETH_RSS_NONFRAG_IPV6_SCTP | \ + ETH_RSS_NONFRAG_IPV6_OTHER | \ + ETH_RSS_L2_PAYLOAD | \ + ETH_RSS_IPV6_EX | \ + ETH_RSS_IPV6_TCP_EX | \ + ETH_RSS_IPV6_UDP_EX) + +/* + * Definitions used for redirection table entry size. + * Some RSS RETA sizes may not be supported by some drivers, check the + * documentation or the description of relevant functions for more details. + */ +#define ETH_RSS_RETA_SIZE_64 64 +#define ETH_RSS_RETA_SIZE_128 128 +#define ETH_RSS_RETA_SIZE_512 512 +#define RTE_RETA_GROUP_SIZE 64 + +/* Definitions used for VMDQ and DCB functionality */ +#define ETH_VMDQ_MAX_VLAN_FILTERS 64 /**< Maximum nb. of VMDQ vlan filters. */ +#define ETH_DCB_NUM_USER_PRIORITIES 8 /**< Maximum nb. of DCB priorities. */ +#define ETH_VMDQ_DCB_NUM_QUEUES 128 /**< Maximum nb. of VMDQ DCB queues. */ +#define ETH_DCB_NUM_QUEUES 128 /**< Maximum nb. of DCB queues. */ + +/* DCB capability defines */ +#define ETH_DCB_PG_SUPPORT 0x00000001 /**< Priority Group(ETS) support. */ +#define ETH_DCB_PFC_SUPPORT 0x00000002 /**< Priority Flow Control support. */ + +/* Definitions used for VLAN Offload functionality */ +#define ETH_VLAN_STRIP_OFFLOAD 0x0001 /**< VLAN Strip On/Off */ +#define ETH_VLAN_FILTER_OFFLOAD 0x0002 /**< VLAN Filter On/Off */ +#define ETH_VLAN_EXTEND_OFFLOAD 0x0004 /**< VLAN Extend On/Off */ + +/* Definitions used for mask VLAN setting */ +#define ETH_VLAN_STRIP_MASK 0x0001 /**< VLAN Strip setting mask */ +#define ETH_VLAN_FILTER_MASK 0x0002 /**< VLAN Filter setting mask*/ +#define ETH_VLAN_EXTEND_MASK 0x0004 /**< VLAN Extend setting mask*/ +#define ETH_VLAN_ID_MAX 0x0FFF /**< VLAN ID is in lower 12 bits*/ + +/* Definitions used for receive MAC address */ +#define ETH_NUM_RECEIVE_MAC_ADDR 128 /**< Maximum nb. of receive mac addr. */ + +/* Definitions used for unicast hash */ +#define ETH_VMDQ_NUM_UC_HASH_ARRAY 128 /**< Maximum nb. of UC hash array. */ + +/* Definitions used for VMDQ pool rx mode setting */ +#define ETH_VMDQ_ACCEPT_UNTAG 0x0001 /**< accept untagged packets. */ +#define ETH_VMDQ_ACCEPT_HASH_MC 0x0002 /**< accept packets in multicast table . */ +#define ETH_VMDQ_ACCEPT_HASH_UC 0x0004 /**< accept packets in unicast table. */ +#define ETH_VMDQ_ACCEPT_BROADCAST 0x0008 /**< accept broadcast packets. */ +#define ETH_VMDQ_ACCEPT_MULTICAST 0x0010 /**< multicast promiscuous. */ + +/** Maximum nb. of vlan per mirror rule */ +#define ETH_MIRROR_MAX_VLANS 64 + +#define ETH_MIRROR_VIRTUAL_POOL_UP 0x01 /**< Virtual Pool uplink Mirroring. */ +#define ETH_MIRROR_UPLINK_PORT 0x02 /**< Uplink Port Mirroring. */ +#define ETH_MIRROR_DOWNLINK_PORT 0x04 /**< Downlink Port Mirroring. */ +#define ETH_MIRROR_VLAN 0x08 /**< VLAN Mirroring. */ +#define ETH_MIRROR_VIRTUAL_POOL_DOWN 0x10 /**< Virtual Pool downlink Mirroring. */ + +/** + * A structure used to configure VLAN traffic mirror of an Ethernet port. + */ +struct rte_eth_vlan_mirror { + uint64_t vlan_mask; /**< mask for valid VLAN ID. */ + /** VLAN ID list for vlan mirroring. */ + uint16_t vlan_id[ETH_MIRROR_MAX_VLANS]; +}; + +/** + * A structure used to configure traffic mirror of an Ethernet port. + */ +struct rte_eth_mirror_conf { + uint8_t rule_type; /**< Mirroring rule type */ + uint8_t dst_pool; /**< Destination pool for this mirror rule. */ + uint64_t pool_mask; /**< Bitmap of pool for pool mirroring */ + /** VLAN ID setting for VLAN mirroring. */ + struct rte_eth_vlan_mirror vlan; +}; + +/** + * A structure used to configure 64 entries of Redirection Table of the + * Receive Side Scaling (RSS) feature of an Ethernet port. To configure + * more than 64 entries supported by hardware, an array of this structure + * is needed. + */ +struct rte_eth_rss_reta_entry64 { + uint64_t mask; + /**< Mask bits indicate which entries need to be updated/queried. */ + uint8_t reta[RTE_RETA_GROUP_SIZE]; + /**< Group of 64 redirection table entries. */ +}; + +/** + * This enum indicates the possible number of traffic classes + * in DCB configratioins + */ +enum rte_eth_nb_tcs { + ETH_4_TCS = 4, /**< 4 TCs with DCB. */ + ETH_8_TCS = 8 /**< 8 TCs with DCB. */ +}; + +/** + * This enum indicates the possible number of queue pools + * in VMDQ configurations. + */ +enum rte_eth_nb_pools { + ETH_8_POOLS = 8, /**< 8 VMDq pools. */ + ETH_16_POOLS = 16, /**< 16 VMDq pools. */ + ETH_32_POOLS = 32, /**< 32 VMDq pools. */ + ETH_64_POOLS = 64 /**< 64 VMDq pools. */ +}; + +/* This structure may be extended in future. */ +struct rte_eth_dcb_rx_conf { + enum rte_eth_nb_tcs nb_tcs; /**< Possible DCB TCs, 4 or 8 TCs */ + /** Traffic class each UP mapped to. */ + uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; +}; + +struct rte_eth_vmdq_dcb_tx_conf { + enum rte_eth_nb_pools nb_queue_pools; /**< With DCB, 16 or 32 pools. */ + /** Traffic class each UP mapped to. */ + uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; +}; + +struct rte_eth_dcb_tx_conf { + enum rte_eth_nb_tcs nb_tcs; /**< Possible DCB TCs, 4 or 8 TCs. */ + /** Traffic class each UP mapped to. */ + uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; +}; + +struct rte_eth_vmdq_tx_conf { + enum rte_eth_nb_pools nb_queue_pools; /**< VMDq mode, 64 pools. */ +}; + +/** + * A structure used to configure the VMDQ+DCB feature + * of an Ethernet port. + * + * Using this feature, packets are routed to a pool of queues, based + * on the vlan id in the vlan tag, and then to a specific queue within + * that pool, using the user priority vlan tag field. + * + * A default pool may be used, if desired, to route all traffic which + * does not match the vlan filter rules. + */ +struct rte_eth_vmdq_dcb_conf { + enum rte_eth_nb_pools nb_queue_pools; /**< With DCB, 16 or 32 pools */ + uint8_t enable_default_pool; /**< If non-zero, use a default pool */ + uint8_t default_pool; /**< The default pool, if applicable */ + uint8_t nb_pool_maps; /**< We can have up to 64 filters/mappings */ + struct { + uint16_t vlan_id; /**< The vlan id of the received frame */ + uint64_t pools; /**< Bitmask of pools for packet rx */ + } pool_map[ETH_VMDQ_MAX_VLAN_FILTERS]; /**< VMDq vlan pool maps. */ + uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; + /**< Selects a queue in a pool */ +}; + +struct rte_eth_vmdq_rx_conf { + enum rte_eth_nb_pools nb_queue_pools; /**< VMDq only mode, 8 or 64 pools */ + uint8_t enable_default_pool; /**< If non-zero, use a default pool */ + uint8_t default_pool; /**< The default pool, if applicable */ + uint8_t enable_loop_back; /**< Enable VT loop back */ + uint8_t nb_pool_maps; /**< We can have up to 64 filters/mappings */ + uint32_t rx_mode; /**< Flags from ETH_VMDQ_ACCEPT_* */ + struct { + uint16_t vlan_id; /**< The vlan id of the received frame */ + uint64_t pools; /**< Bitmask of pools for packet rx */ + } pool_map[ETH_VMDQ_MAX_VLAN_FILTERS]; /**< VMDq vlan pool maps. */ +}; + +/** + * A structure used to configure the TX features of an Ethernet port. + */ +struct rte_eth_txmode { + enum rte_eth_tx_mq_mode mq_mode; /**< TX multi-queues mode. */ + + /* For i40e specifically */ + uint16_t pvid; + uint8_t hw_vlan_reject_tagged : 1, + /**< If set, reject sending out tagged pkts */ + hw_vlan_reject_untagged : 1, + /**< If set, reject sending out untagged pkts */ + hw_vlan_insert_pvid : 1; + /**< If set, enable port based VLAN insertion */ +}; + +/** + * A structure used to configure an RX ring of an Ethernet port. + */ +struct rte_eth_rxconf { + struct rte_eth_thresh rx_thresh; /**< RX ring threshold registers. */ + uint16_t rx_free_thresh; /**< Drives the freeing of RX descriptors. */ + uint8_t rx_drop_en; /**< Drop packets if no descriptors are available. */ + uint8_t rx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */ +}; + +#define ETH_TXQ_FLAGS_NOMULTSEGS 0x0001 /**< nb_segs=1 for all mbufs */ +#define ETH_TXQ_FLAGS_NOREFCOUNT 0x0002 /**< refcnt can be ignored */ +#define ETH_TXQ_FLAGS_NOMULTMEMP 0x0004 /**< all bufs come from same mempool */ +#define ETH_TXQ_FLAGS_NOVLANOFFL 0x0100 /**< disable VLAN offload */ +#define ETH_TXQ_FLAGS_NOXSUMSCTP 0x0200 /**< disable SCTP checksum offload */ +#define ETH_TXQ_FLAGS_NOXSUMUDP 0x0400 /**< disable UDP checksum offload */ +#define ETH_TXQ_FLAGS_NOXSUMTCP 0x0800 /**< disable TCP checksum offload */ +#define ETH_TXQ_FLAGS_NOOFFLOADS \ + (ETH_TXQ_FLAGS_NOVLANOFFL | ETH_TXQ_FLAGS_NOXSUMSCTP | \ + ETH_TXQ_FLAGS_NOXSUMUDP | ETH_TXQ_FLAGS_NOXSUMTCP) +#define ETH_TXQ_FLAGS_NOXSUMS \ + (ETH_TXQ_FLAGS_NOXSUMSCTP | ETH_TXQ_FLAGS_NOXSUMUDP | \ + ETH_TXQ_FLAGS_NOXSUMTCP) +/** + * A structure used to configure a TX ring of an Ethernet port. + */ +struct rte_eth_txconf { + struct rte_eth_thresh tx_thresh; /**< TX ring threshold registers. */ + uint16_t tx_rs_thresh; /**< Drives the setting of RS bit on TXDs. */ + uint16_t tx_free_thresh; /**< Start freeing TX buffers if there are + less free descriptors than this value. */ + + uint32_t txq_flags; /**< Set flags for the Tx queue */ + uint8_t tx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */ +}; + +/** + * A structure contains information about HW descriptor ring limitations. + */ +struct rte_eth_desc_lim { + uint16_t nb_max; /**< Max allowed number of descriptors. */ + uint16_t nb_min; /**< Min allowed number of descriptors. */ + uint16_t nb_align; /**< Number of descriptors should be aligned to. */ +}; + +/** + * This enum indicates the flow control mode + */ +enum rte_eth_fc_mode { + RTE_FC_NONE = 0, /**< Disable flow control. */ + RTE_FC_RX_PAUSE, /**< RX pause frame, enable flowctrl on TX side. */ + RTE_FC_TX_PAUSE, /**< TX pause frame, enable flowctrl on RX side. */ + RTE_FC_FULL /**< Enable flow control on both side. */ +}; + +/** + * A structure used to configure Ethernet flow control parameter. + * These parameters will be configured into the register of the NIC. + * Please refer to the corresponding data sheet for proper value. + */ +struct rte_eth_fc_conf { + uint32_t high_water; /**< High threshold value to trigger XOFF */ + uint32_t low_water; /**< Low threshold value to trigger XON */ + uint16_t pause_time; /**< Pause quota in the Pause frame */ + uint16_t send_xon; /**< Is XON frame need be sent */ + enum rte_eth_fc_mode mode; /**< Link flow control mode */ + uint8_t mac_ctrl_frame_fwd; /**< Forward MAC control frames */ + uint8_t autoneg; /**< Use Pause autoneg */ +}; + +/** + * A structure used to configure Ethernet priority flow control parameter. + * These parameters will be configured into the register of the NIC. + * Please refer to the corresponding data sheet for proper value. + */ +struct rte_eth_pfc_conf { + struct rte_eth_fc_conf fc; /**< General flow control parameter. */ + uint8_t priority; /**< VLAN User Priority. */ +}; + +/** + * Memory space that can be configured to store Flow Director filters + * in the board memory. + */ +enum rte_fdir_pballoc_type { + RTE_FDIR_PBALLOC_64K = 0, /**< 64k. */ + RTE_FDIR_PBALLOC_128K, /**< 128k. */ + RTE_FDIR_PBALLOC_256K, /**< 256k. */ +}; + +/** + * Select report mode of FDIR hash information in RX descriptors. + */ +enum rte_fdir_status_mode { + RTE_FDIR_NO_REPORT_STATUS = 0, /**< Never report FDIR hash. */ + RTE_FDIR_REPORT_STATUS, /**< Only report FDIR hash for matching pkts. */ + RTE_FDIR_REPORT_STATUS_ALWAYS, /**< Always report FDIR hash. */ +}; + +/** + * A structure used to configure the Flow Director (FDIR) feature + * of an Ethernet port. + * + * If mode is RTE_FDIR_DISABLE, the pballoc value is ignored. + */ +struct rte_fdir_conf { + enum rte_fdir_mode mode; /**< Flow Director mode. */ + enum rte_fdir_pballoc_type pballoc; /**< Space for FDIR filters. */ + enum rte_fdir_status_mode status; /**< How to report FDIR hash. */ + /** RX queue of packets matching a "drop" filter in perfect mode. */ + uint8_t drop_queue; + struct rte_eth_fdir_masks mask; + struct rte_eth_fdir_flex_conf flex_conf; + /**< Flex payload configuration. */ +}; + +/** + * UDP tunneling configuration. + */ +struct rte_eth_udp_tunnel { + uint16_t udp_port; + uint8_t prot_type; +}; + +/** + * A structure used to enable/disable specific device interrupts. + */ +struct rte_intr_conf { + /** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */ + uint16_t lsc; + /** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */ + uint16_t rxq; +}; + +/** + * A structure used to configure an Ethernet port. + * Depending upon the RX multi-queue mode, extra advanced + * configuration settings may be needed. + */ +struct rte_eth_conf { + uint16_t link_speed; + /**< ETH_LINK_SPEED_10[0|00|000], or 0 for autonegotation */ + uint16_t link_duplex; + /**< ETH_LINK_[HALF_DUPLEX|FULL_DUPLEX], or 0 for autonegotation */ + struct rte_eth_rxmode rxmode; /**< Port RX configuration. */ + struct rte_eth_txmode txmode; /**< Port TX configuration. */ + uint32_t lpbk_mode; /**< Loopback operation mode. By default the value + is 0, meaning the loopback mode is disabled. + Read the datasheet of given ethernet controller + for details. The possible values of this field + are defined in implementation of each driver. */ + struct { + struct rte_eth_rss_conf rss_conf; /**< Port RSS configuration */ + struct rte_eth_vmdq_dcb_conf vmdq_dcb_conf; + /**< Port vmdq+dcb configuration. */ + struct rte_eth_dcb_rx_conf dcb_rx_conf; + /**< Port dcb RX configuration. */ + struct rte_eth_vmdq_rx_conf vmdq_rx_conf; + /**< Port vmdq RX configuration. */ + } rx_adv_conf; /**< Port RX filtering configuration (union). */ + union { + struct rte_eth_vmdq_dcb_tx_conf vmdq_dcb_tx_conf; + /**< Port vmdq+dcb TX configuration. */ + struct rte_eth_dcb_tx_conf dcb_tx_conf; + /**< Port dcb TX configuration. */ + struct rte_eth_vmdq_tx_conf vmdq_tx_conf; + /**< Port vmdq TX configuration. */ + } tx_adv_conf; /**< Port TX DCB configuration (union). */ + /** Currently,Priority Flow Control(PFC) are supported,if DCB with PFC + is needed,and the variable must be set ETH_DCB_PFC_SUPPORT. */ + uint32_t dcb_capability_en; + struct rte_fdir_conf fdir_conf; /**< FDIR configuration. */ + struct rte_intr_conf intr_conf; /**< Interrupt mode configuration. */ +}; + +/** + * A structure used to retrieve the contextual information of + * an Ethernet device, such as the controlling driver of the device, + * its PCI context, etc... + */ + +/** + * RX offload capabilities of a device. + */ +#define DEV_RX_OFFLOAD_VLAN_STRIP 0x00000001 +#define DEV_RX_OFFLOAD_IPV4_CKSUM 0x00000002 +#define DEV_RX_OFFLOAD_UDP_CKSUM 0x00000004 +#define DEV_RX_OFFLOAD_TCP_CKSUM 0x00000008 +#define DEV_RX_OFFLOAD_TCP_LRO 0x00000010 +#define DEV_RX_OFFLOAD_QINQ_STRIP 0x00000020 + +/** + * TX offload capabilities of a device. + */ +#define DEV_TX_OFFLOAD_VLAN_INSERT 0x00000001 +#define DEV_TX_OFFLOAD_IPV4_CKSUM 0x00000002 +#define DEV_TX_OFFLOAD_UDP_CKSUM 0x00000004 +#define DEV_TX_OFFLOAD_TCP_CKSUM 0x00000008 +#define DEV_TX_OFFLOAD_SCTP_CKSUM 0x00000010 +#define DEV_TX_OFFLOAD_TCP_TSO 0x00000020 +#define DEV_TX_OFFLOAD_UDP_TSO 0x00000040 +#define DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000080 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_QINQ_INSERT 0x00000100 + +struct rte_eth_dev_info { + struct rte_pci_device *pci_dev; /**< Device PCI information. */ + const char *driver_name; /**< Device Driver name. */ + unsigned int if_index; /**< Index to bound host interface, or 0 if none. + Use if_indextoname() to translate into an interface name. */ + uint32_t min_rx_bufsize; /**< Minimum size of RX buffer. */ + uint32_t max_rx_pktlen; /**< Maximum configurable length of RX pkt. */ + uint16_t max_rx_queues; /**< Maximum number of RX queues. */ + uint16_t max_tx_queues; /**< Maximum number of TX queues. */ + uint32_t max_mac_addrs; /**< Maximum number of MAC addresses. */ + uint32_t max_hash_mac_addrs; + /** Maximum number of hash MAC addresses for MTA and UTA. */ + uint16_t max_vfs; /**< Maximum number of VFs. */ + uint16_t max_vmdq_pools; /**< Maximum number of VMDq pools. */ + uint32_t rx_offload_capa; /**< Device RX offload capabilities. */ + uint32_t tx_offload_capa; /**< Device TX offload capabilities. */ + uint16_t reta_size; + /**< Device redirection table size, the total number of entries. */ + uint8_t hash_key_size; /**< Hash key size in bytes */ + /** Bit mask of RSS offloads, the bit offset also means flow type */ + uint64_t flow_type_rss_offloads; + struct rte_eth_rxconf default_rxconf; /**< Default RX configuration */ + struct rte_eth_txconf default_txconf; /**< Default TX configuration */ + uint16_t vmdq_queue_base; /**< First queue ID for VMDQ pools. */ + uint16_t vmdq_queue_num; /**< Queue number for VMDQ pools. */ + uint16_t vmdq_pool_base; /**< First ID of VMDQ pools. */ + struct rte_eth_desc_lim rx_desc_lim; /**< RX descriptors limits */ + struct rte_eth_desc_lim tx_desc_lim; /**< TX descriptors limits */ +}; + +/** + * Ethernet device RX queue information structure. + * Used to retieve information about configured queue. + */ +struct rte_eth_rxq_info { + struct rte_mempool *mp; /**< mempool used by that queue. */ + struct rte_eth_rxconf conf; /**< queue config parameters. */ + uint8_t scattered_rx; /**< scattered packets RX supported. */ + uint16_t nb_desc; /**< configured number of RXDs. */ +} __rte_cache_aligned; + +/** + * Ethernet device TX queue information structure. + * Used to retieve information about configured queue. + */ +struct rte_eth_txq_info { + struct rte_eth_txconf conf; /**< queue config parameters. */ + uint16_t nb_desc; /**< configured number of TXDs. */ +} __rte_cache_aligned; + +/** Maximum name length for extended statistics counters */ +#define RTE_ETH_XSTATS_NAME_SIZE 64 + +/** + * An Ethernet device extended statistic structure + * + * This structure is used by ethdev->eth_xstats_get() to provide + * statistics that are not provided in the generic rte_eth_stats + * structure. + */ +struct rte_eth_xstats { + char name[RTE_ETH_XSTATS_NAME_SIZE]; + uint64_t value; +}; + +#define ETH_DCB_NUM_TCS 8 +#define ETH_MAX_VMDQ_POOL 64 + +/** + * A structure used to get the information of queue and + * TC mapping on both TX and RX paths. + */ +struct rte_eth_dcb_tc_queue_mapping { + /** rx queues assigned to tc per Pool */ + struct { + uint8_t base; + uint8_t nb_queue; + } tc_rxq[ETH_MAX_VMDQ_POOL][ETH_DCB_NUM_TCS]; + /** rx queues assigned to tc per Pool */ + struct { + uint8_t base; + uint8_t nb_queue; + } tc_txq[ETH_MAX_VMDQ_POOL][ETH_DCB_NUM_TCS]; +}; + +/** + * A structure used to get the information of DCB. + * It includes TC UP mapping and queue TC mapping. + */ +struct rte_eth_dcb_info { + uint8_t nb_tcs; /**< number of TCs */ + uint8_t prio_tc[ETH_DCB_NUM_USER_PRIORITIES]; /**< Priority to tc */ + uint8_t tc_bws[ETH_DCB_NUM_TCS]; /**< TX BW percentage for each TC */ + /** rx queues assigned to tc */ + struct rte_eth_dcb_tc_queue_mapping tc_queue; +}; + +/** + * RX/TX queue states + */ +#define RTE_ETH_QUEUE_STATE_STOPPED 0 +#define RTE_ETH_QUEUE_STATE_STARTED 1 + +struct rte_eth_dev; + +struct rte_eth_dev_callback; +/** @internal Structure to keep track of registered callbacks */ +TAILQ_HEAD(rte_eth_dev_cb_list, rte_eth_dev_callback); + + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG +#define RTE_PMD_DEBUG_TRACE(...) \ + rte_pmd_debug_trace(__func__, __VA_ARGS__) +#else +#define RTE_PMD_DEBUG_TRACE(...) +#endif + + +/* Macros to check for valid port */ +#define RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, retval) do { \ + if (!rte_eth_dev_is_valid_port(port_id)) { \ + RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); \ + return retval; \ + } \ +} while (0) + +#define RTE_ETH_VALID_PORTID_OR_RET(port_id) do { \ + if (!rte_eth_dev_is_valid_port(port_id)) { \ + RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); \ + return; \ + } \ +} while (0) + +/* + * Definitions of all functions exported by an Ethernet driver through the + * the generic structure of type *eth_dev_ops* supplied in the *rte_eth_dev* + * structure associated with an Ethernet device. + */ + +typedef int (*eth_dev_configure_t)(struct rte_eth_dev *dev); +/**< @internal Ethernet device configuration. */ + +typedef int (*eth_dev_start_t)(struct rte_eth_dev *dev); +/**< @internal Function used to start a configured Ethernet device. */ + +typedef void (*eth_dev_stop_t)(struct rte_eth_dev *dev); +/**< @internal Function used to stop a configured Ethernet device. */ + +typedef int (*eth_dev_set_link_up_t)(struct rte_eth_dev *dev); +/**< @internal Function used to link up a configured Ethernet device. */ + +typedef int (*eth_dev_set_link_down_t)(struct rte_eth_dev *dev); +/**< @internal Function used to link down a configured Ethernet device. */ + +typedef void (*eth_dev_close_t)(struct rte_eth_dev *dev); +/**< @internal Function used to close a configured Ethernet device. */ + +typedef void (*eth_promiscuous_enable_t)(struct rte_eth_dev *dev); +/**< @internal Function used to enable the RX promiscuous mode of an Ethernet device. */ + +typedef void (*eth_promiscuous_disable_t)(struct rte_eth_dev *dev); +/**< @internal Function used to disable the RX promiscuous mode of an Ethernet device. */ + +typedef void (*eth_allmulticast_enable_t)(struct rte_eth_dev *dev); +/**< @internal Enable the receipt of all multicast packets by an Ethernet device. */ + +typedef void (*eth_allmulticast_disable_t)(struct rte_eth_dev *dev); +/**< @internal Disable the receipt of all multicast packets by an Ethernet device. */ + +typedef int (*eth_link_update_t)(struct rte_eth_dev *dev, + int wait_to_complete); +/**< @internal Get link speed, duplex mode and state (up/down) of an Ethernet device. */ + +typedef void (*eth_stats_get_t)(struct rte_eth_dev *dev, + struct rte_eth_stats *igb_stats); +/**< @internal Get global I/O statistics of an Ethernet device. */ + +typedef void (*eth_stats_reset_t)(struct rte_eth_dev *dev); +/**< @internal Reset global I/O statistics of an Ethernet device to 0. */ + +typedef int (*eth_xstats_get_t)(struct rte_eth_dev *dev, + struct rte_eth_xstats *stats, unsigned n); +/**< @internal Get extended stats of an Ethernet device. */ + +typedef void (*eth_xstats_reset_t)(struct rte_eth_dev *dev); +/**< @internal Reset extended stats of an Ethernet device. */ + +typedef int (*eth_queue_stats_mapping_set_t)(struct rte_eth_dev *dev, + uint16_t queue_id, + uint8_t stat_idx, + uint8_t is_rx); +/**< @internal Set a queue statistics mapping for a tx/rx queue of an Ethernet device. */ + +typedef void (*eth_dev_infos_get_t)(struct rte_eth_dev *dev, + struct rte_eth_dev_info *dev_info); +/**< @internal Get specific informations of an Ethernet device. */ + +typedef int (*eth_queue_start_t)(struct rte_eth_dev *dev, + uint16_t queue_id); +/**< @internal Start rx and tx of a queue of an Ethernet device. */ + +typedef int (*eth_queue_stop_t)(struct rte_eth_dev *dev, + uint16_t queue_id); +/**< @internal Stop rx and tx of a queue of an Ethernet device. */ + +typedef int (*eth_rx_queue_setup_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id, + uint16_t nb_rx_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool); +/**< @internal Set up a receive queue of an Ethernet device. */ + +typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev, + uint16_t tx_queue_id, + uint16_t nb_tx_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); +/**< @internal Setup a transmit queue of an Ethernet device. */ + +typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id); +/**< @internal Enable interrupt of a receive queue of an Ethernet device. */ + +typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id); +/**< @internal Disable interrupt of a receive queue of an Ethernet device. */ + +typedef void (*eth_queue_release_t)(void *queue); +/**< @internal Release memory resources allocated by given RX/TX queue. */ + +typedef uint32_t (*eth_rx_queue_count_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id); +/**< @internal Get number of available descriptors on a receive queue of an Ethernet device. */ + +typedef int (*eth_rx_descriptor_done_t)(void *rxq, uint16_t offset); +/**< @internal Check DD bit of specific RX descriptor */ + +typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id, struct rte_eth_rxq_info *qinfo); + +typedef void (*eth_txq_info_get_t)(struct rte_eth_dev *dev, + uint16_t tx_queue_id, struct rte_eth_txq_info *qinfo); + +typedef int (*mtu_set_t)(struct rte_eth_dev *dev, uint16_t mtu); +/**< @internal Set MTU. */ + +typedef int (*vlan_filter_set_t)(struct rte_eth_dev *dev, + uint16_t vlan_id, + int on); +/**< @internal filtering of a VLAN Tag Identifier by an Ethernet device. */ + +typedef void (*vlan_tpid_set_t)(struct rte_eth_dev *dev, + uint16_t tpid); +/**< @internal set the outer VLAN-TPID by an Ethernet device. */ + +typedef void (*vlan_offload_set_t)(struct rte_eth_dev *dev, int mask); +/**< @internal set VLAN offload function by an Ethernet device. */ + +typedef int (*vlan_pvid_set_t)(struct rte_eth_dev *dev, + uint16_t vlan_id, + int on); +/**< @internal set port based TX VLAN insertion by an Ethernet device. */ + +typedef void (*vlan_strip_queue_set_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id, + int on); +/**< @internal VLAN stripping enable/disable by an queue of Ethernet device. */ + +typedef uint16_t (*eth_rx_burst_t)(void *rxq, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); +/**< @internal Retrieve input packets from a receive queue of an Ethernet device. */ + +typedef uint16_t (*eth_tx_burst_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Send output packets on a transmit queue of an Ethernet device. */ + +typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, + struct rte_eth_fc_conf *fc_conf); +/**< @internal Get current flow control parameter on an Ethernet device */ + +typedef int (*flow_ctrl_set_t)(struct rte_eth_dev *dev, + struct rte_eth_fc_conf *fc_conf); +/**< @internal Setup flow control parameter on an Ethernet device */ + +typedef int (*priority_flow_ctrl_set_t)(struct rte_eth_dev *dev, + struct rte_eth_pfc_conf *pfc_conf); +/**< @internal Setup priority flow control parameter on an Ethernet device */ + +typedef int (*reta_update_t)(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); +/**< @internal Update RSS redirection table on an Ethernet device */ + +typedef int (*reta_query_t)(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); +/**< @internal Query RSS redirection table on an Ethernet device */ + +typedef int (*rss_hash_update_t)(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf); +/**< @internal Update RSS hash configuration of an Ethernet device */ + +typedef int (*rss_hash_conf_get_t)(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf); +/**< @internal Get current RSS hash configuration of an Ethernet device */ + +typedef int (*eth_dev_led_on_t)(struct rte_eth_dev *dev); +/**< @internal Turn on SW controllable LED on an Ethernet device */ + +typedef int (*eth_dev_led_off_t)(struct rte_eth_dev *dev); +/**< @internal Turn off SW controllable LED on an Ethernet device */ + +typedef void (*eth_mac_addr_remove_t)(struct rte_eth_dev *dev, uint32_t index); +/**< @internal Remove MAC address from receive address register */ + +typedef void (*eth_mac_addr_add_t)(struct rte_eth_dev *dev, + struct ether_addr *mac_addr, + uint32_t index, + uint32_t vmdq); +/**< @internal Set a MAC address into Receive Address Address Register */ + +typedef void (*eth_mac_addr_set_t)(struct rte_eth_dev *dev, + struct ether_addr *mac_addr); +/**< @internal Set a MAC address into Receive Address Address Register */ + +typedef int (*eth_uc_hash_table_set_t)(struct rte_eth_dev *dev, + struct ether_addr *mac_addr, + uint8_t on); +/**< @internal Set a Unicast Hash bitmap */ + +typedef int (*eth_uc_all_hash_table_set_t)(struct rte_eth_dev *dev, + uint8_t on); +/**< @internal Set all Unicast Hash bitmap */ + +typedef int (*eth_set_vf_rx_mode_t)(struct rte_eth_dev *dev, + uint16_t vf, + uint16_t rx_mode, + uint8_t on); +/**< @internal Set a VF receive mode */ + +typedef int (*eth_set_vf_rx_t)(struct rte_eth_dev *dev, + uint16_t vf, + uint8_t on); +/**< @internal Set a VF receive mode */ + +typedef int (*eth_set_vf_tx_t)(struct rte_eth_dev *dev, + uint16_t vf, + uint8_t on); +/**< @internal Enable or disable a VF transmit */ + +typedef int (*eth_set_vf_vlan_filter_t)(struct rte_eth_dev *dev, + uint16_t vlan, + uint64_t vf_mask, + uint8_t vlan_on); +/**< @internal Set VF VLAN pool filter */ + +typedef int (*eth_set_queue_rate_limit_t)(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t tx_rate); +/**< @internal Set queue TX rate */ + +typedef int (*eth_set_vf_rate_limit_t)(struct rte_eth_dev *dev, + uint16_t vf, + uint16_t tx_rate, + uint64_t q_msk); +/**< @internal Set VF TX rate */ + +typedef int (*eth_mirror_rule_set_t)(struct rte_eth_dev *dev, + struct rte_eth_mirror_conf *mirror_conf, + uint8_t rule_id, + uint8_t on); +/**< @internal Add a traffic mirroring rule on an Ethernet device */ + +typedef int (*eth_mirror_rule_reset_t)(struct rte_eth_dev *dev, + uint8_t rule_id); +/**< @internal Remove a traffic mirroring rule on an Ethernet device */ + +typedef int (*eth_udp_tunnel_add_t)(struct rte_eth_dev *dev, + struct rte_eth_udp_tunnel *tunnel_udp); +/**< @internal Add tunneling UDP info */ + +typedef int (*eth_udp_tunnel_del_t)(struct rte_eth_dev *dev, + struct rte_eth_udp_tunnel *tunnel_udp); +/**< @internal Delete tunneling UDP info */ + +typedef int (*eth_set_mc_addr_list_t)(struct rte_eth_dev *dev, + struct ether_addr *mc_addr_set, + uint32_t nb_mc_addr); +/**< @internal set the list of multicast addresses on an Ethernet device */ + +typedef int (*eth_timesync_enable_t)(struct rte_eth_dev *dev); +/**< @internal Function used to enable IEEE1588/802.1AS timestamping. */ + +typedef int (*eth_timesync_disable_t)(struct rte_eth_dev *dev); +/**< @internal Function used to disable IEEE1588/802.1AS timestamping. */ + +typedef int (*eth_timesync_read_rx_timestamp_t)(struct rte_eth_dev *dev, + struct timespec *timestamp, + uint32_t flags); +/**< @internal Function used to read an RX IEEE1588/802.1AS timestamp. */ + +typedef int (*eth_timesync_read_tx_timestamp_t)(struct rte_eth_dev *dev, + struct timespec *timestamp); +/**< @internal Function used to read a TX IEEE1588/802.1AS timestamp. */ + +typedef int (*eth_timesync_adjust_time)(struct rte_eth_dev *dev, int64_t); +/**< @internal Function used to adjust the device clock */ + +typedef int (*eth_timesync_read_time)(struct rte_eth_dev *dev, + struct timespec *timestamp); +/**< @internal Function used to get time from the device clock. */ + +typedef int (*eth_timesync_write_time)(struct rte_eth_dev *dev, + const struct timespec *timestamp); +/**< @internal Function used to get time from the device clock */ + +typedef int (*eth_get_reg_length_t)(struct rte_eth_dev *dev); +/**< @internal Retrieve device register count */ + +typedef int (*eth_get_reg_t)(struct rte_eth_dev *dev, + struct rte_dev_reg_info *info); +/**< @internal Retrieve registers */ + +typedef int (*eth_get_eeprom_length_t)(struct rte_eth_dev *dev); +/**< @internal Retrieve eeprom size */ + +typedef int (*eth_get_eeprom_t)(struct rte_eth_dev *dev, + struct rte_dev_eeprom_info *info); +/**< @internal Retrieve eeprom data */ + +typedef int (*eth_set_eeprom_t)(struct rte_eth_dev *dev, + struct rte_dev_eeprom_info *info); +/**< @internal Program eeprom data */ + +#ifdef RTE_NIC_BYPASS + +enum { + RTE_BYPASS_MODE_NONE, + RTE_BYPASS_MODE_NORMAL, + RTE_BYPASS_MODE_BYPASS, + RTE_BYPASS_MODE_ISOLATE, + RTE_BYPASS_MODE_NUM, +}; + +#define RTE_BYPASS_MODE_VALID(x) \ + ((x) > RTE_BYPASS_MODE_NONE && (x) < RTE_BYPASS_MODE_NUM) + +enum { + RTE_BYPASS_EVENT_NONE, + RTE_BYPASS_EVENT_START, + RTE_BYPASS_EVENT_OS_ON = RTE_BYPASS_EVENT_START, + RTE_BYPASS_EVENT_POWER_ON, + RTE_BYPASS_EVENT_OS_OFF, + RTE_BYPASS_EVENT_POWER_OFF, + RTE_BYPASS_EVENT_TIMEOUT, + RTE_BYPASS_EVENT_NUM +}; + +#define RTE_BYPASS_EVENT_VALID(x) \ + ((x) > RTE_BYPASS_EVENT_NONE && (x) < RTE_BYPASS_MODE_NUM) + +enum { + RTE_BYPASS_TMT_OFF, /* timeout disabled. */ + RTE_BYPASS_TMT_1_5_SEC, /* timeout for 1.5 seconds */ + RTE_BYPASS_TMT_2_SEC, /* timeout for 2 seconds */ + RTE_BYPASS_TMT_3_SEC, /* timeout for 3 seconds */ + RTE_BYPASS_TMT_4_SEC, /* timeout for 4 seconds */ + RTE_BYPASS_TMT_8_SEC, /* timeout for 8 seconds */ + RTE_BYPASS_TMT_16_SEC, /* timeout for 16 seconds */ + RTE_BYPASS_TMT_32_SEC, /* timeout for 32 seconds */ + RTE_BYPASS_TMT_NUM +}; + +#define RTE_BYPASS_TMT_VALID(x) \ + ((x) == RTE_BYPASS_TMT_OFF || \ + ((x) > RTE_BYPASS_TMT_OFF && (x) < RTE_BYPASS_TMT_NUM)) + +typedef void (*bypass_init_t)(struct rte_eth_dev *dev); +typedef int32_t (*bypass_state_set_t)(struct rte_eth_dev *dev, uint32_t *new_state); +typedef int32_t (*bypass_state_show_t)(struct rte_eth_dev *dev, uint32_t *state); +typedef int32_t (*bypass_event_set_t)(struct rte_eth_dev *dev, uint32_t state, uint32_t event); +typedef int32_t (*bypass_event_show_t)(struct rte_eth_dev *dev, uint32_t event_shift, uint32_t *event); +typedef int32_t (*bypass_wd_timeout_set_t)(struct rte_eth_dev *dev, uint32_t timeout); +typedef int32_t (*bypass_wd_timeout_show_t)(struct rte_eth_dev *dev, uint32_t *wd_timeout); +typedef int32_t (*bypass_ver_show_t)(struct rte_eth_dev *dev, uint32_t *ver); +typedef int32_t (*bypass_wd_reset_t)(struct rte_eth_dev *dev); +#endif + +typedef int (*eth_filter_ctrl_t)(struct rte_eth_dev *dev, + enum rte_filter_type filter_type, + enum rte_filter_op filter_op, + void *arg); +/**< @internal Take operations to assigned filter type on an Ethernet device */ + +typedef int (*eth_get_dcb_info)(struct rte_eth_dev *dev, + struct rte_eth_dcb_info *dcb_info); +/**< @internal Get dcb information on an Ethernet device */ + +/** + * @internal A structure containing the functions exported by an Ethernet driver. + */ +struct eth_dev_ops { + eth_dev_configure_t dev_configure; /**< Configure device. */ + eth_dev_start_t dev_start; /**< Start device. */ + eth_dev_stop_t dev_stop; /**< Stop device. */ + eth_dev_set_link_up_t dev_set_link_up; /**< Device link up. */ + eth_dev_set_link_down_t dev_set_link_down; /**< Device link down. */ + eth_dev_close_t dev_close; /**< Close device. */ + eth_promiscuous_enable_t promiscuous_enable; /**< Promiscuous ON. */ + eth_promiscuous_disable_t promiscuous_disable;/**< Promiscuous OFF. */ + eth_allmulticast_enable_t allmulticast_enable;/**< RX multicast ON. */ + eth_allmulticast_disable_t allmulticast_disable;/**< RX multicast OF. */ + eth_link_update_t link_update; /**< Get device link state. */ + eth_stats_get_t stats_get; /**< Get generic device statistics. */ + eth_stats_reset_t stats_reset; /**< Reset generic device statistics. */ + eth_xstats_get_t xstats_get; /**< Get extended device statistics. */ + eth_xstats_reset_t xstats_reset; /**< Reset extended device statistics. */ + eth_queue_stats_mapping_set_t queue_stats_mapping_set; + /**< Configure per queue stat counter mapping. */ + eth_dev_infos_get_t dev_infos_get; /**< Get device info. */ + mtu_set_t mtu_set; /**< Set MTU. */ + vlan_filter_set_t vlan_filter_set; /**< Filter VLAN Setup. */ + vlan_tpid_set_t vlan_tpid_set; /**< Outer VLAN TPID Setup. */ + vlan_strip_queue_set_t vlan_strip_queue_set; /**< VLAN Stripping on queue. */ + vlan_offload_set_t vlan_offload_set; /**< Set VLAN Offload. */ + vlan_pvid_set_t vlan_pvid_set; /**< Set port based TX VLAN insertion */ + eth_queue_start_t rx_queue_start;/**< Start RX for a queue.*/ + eth_queue_stop_t rx_queue_stop;/**< Stop RX for a queue.*/ + eth_queue_start_t tx_queue_start;/**< Start TX for a queue.*/ + eth_queue_stop_t tx_queue_stop;/**< Stop TX for a queue.*/ + eth_rx_queue_setup_t rx_queue_setup;/**< Set up device RX queue.*/ + eth_queue_release_t rx_queue_release;/**< Release RX queue.*/ + eth_rx_queue_count_t rx_queue_count; /**< Get Rx queue count. */ + eth_rx_descriptor_done_t rx_descriptor_done; /**< Check rxd DD bit */ + /**< Enable Rx queue interrupt. */ + eth_rx_enable_intr_t rx_queue_intr_enable; + /**< Disable Rx queue interrupt.*/ + eth_rx_disable_intr_t rx_queue_intr_disable; + eth_tx_queue_setup_t tx_queue_setup;/**< Set up device TX queue.*/ + eth_queue_release_t tx_queue_release;/**< Release TX queue.*/ + eth_dev_led_on_t dev_led_on; /**< Turn on LED. */ + eth_dev_led_off_t dev_led_off; /**< Turn off LED. */ + flow_ctrl_get_t flow_ctrl_get; /**< Get flow control. */ + flow_ctrl_set_t flow_ctrl_set; /**< Setup flow control. */ + priority_flow_ctrl_set_t priority_flow_ctrl_set; /**< Setup priority flow control.*/ + eth_mac_addr_remove_t mac_addr_remove; /**< Remove MAC address */ + eth_mac_addr_add_t mac_addr_add; /**< Add a MAC address */ + eth_mac_addr_set_t mac_addr_set; /**< Set a MAC address */ + eth_uc_hash_table_set_t uc_hash_table_set; /**< Set Unicast Table Array */ + eth_uc_all_hash_table_set_t uc_all_hash_table_set; /**< Set Unicast hash bitmap */ + eth_mirror_rule_set_t mirror_rule_set; /**< Add a traffic mirror rule.*/ + eth_mirror_rule_reset_t mirror_rule_reset; /**< reset a traffic mirror rule.*/ + eth_set_vf_rx_mode_t set_vf_rx_mode; /**< Set VF RX mode */ + eth_set_vf_rx_t set_vf_rx; /**< enable/disable a VF receive */ + eth_set_vf_tx_t set_vf_tx; /**< enable/disable a VF transmit */ + eth_set_vf_vlan_filter_t set_vf_vlan_filter; /**< Set VF VLAN filter */ + eth_udp_tunnel_add_t udp_tunnel_add; + eth_udp_tunnel_del_t udp_tunnel_del; + eth_set_queue_rate_limit_t set_queue_rate_limit; /**< Set queue rate limit */ + eth_set_vf_rate_limit_t set_vf_rate_limit; /**< Set VF rate limit */ + /** Update redirection table. */ + reta_update_t reta_update; + /** Query redirection table. */ + reta_query_t reta_query; + + eth_get_reg_length_t get_reg_length; + /**< Get # of registers */ + eth_get_reg_t get_reg; + /**< Get registers */ + eth_get_eeprom_length_t get_eeprom_length; + /**< Get eeprom length */ + eth_get_eeprom_t get_eeprom; + /**< Get eeprom data */ + eth_set_eeprom_t set_eeprom; + /**< Set eeprom */ + /* bypass control */ +#ifdef RTE_NIC_BYPASS + bypass_init_t bypass_init; + bypass_state_set_t bypass_state_set; + bypass_state_show_t bypass_state_show; + bypass_event_set_t bypass_event_set; + bypass_event_show_t bypass_event_show; + bypass_wd_timeout_set_t bypass_wd_timeout_set; + bypass_wd_timeout_show_t bypass_wd_timeout_show; + bypass_ver_show_t bypass_ver_show; + bypass_wd_reset_t bypass_wd_reset; +#endif + + /** Configure RSS hash protocols. */ + rss_hash_update_t rss_hash_update; + /** Get current RSS hash configuration. */ + rss_hash_conf_get_t rss_hash_conf_get; + eth_filter_ctrl_t filter_ctrl; + /**< common filter control. */ + eth_set_mc_addr_list_t set_mc_addr_list; /**< set list of mcast addrs */ + eth_rxq_info_get_t rxq_info_get; + /**< retrieve RX queue information. */ + eth_txq_info_get_t txq_info_get; + /**< retrieve TX queue information. */ + /** Turn IEEE1588/802.1AS timestamping on. */ + eth_timesync_enable_t timesync_enable; + /** Turn IEEE1588/802.1AS timestamping off. */ + eth_timesync_disable_t timesync_disable; + /** Read the IEEE1588/802.1AS RX timestamp. */ + eth_timesync_read_rx_timestamp_t timesync_read_rx_timestamp; + /** Read the IEEE1588/802.1AS TX timestamp. */ + eth_timesync_read_tx_timestamp_t timesync_read_tx_timestamp; + + /** Get DCB information */ + eth_get_dcb_info get_dcb_info; + /** Adjust the device clock.*/ + eth_timesync_adjust_time timesync_adjust_time; + /** Get the device clock time. */ + eth_timesync_read_time timesync_read_time; + /** Set the device clock time. */ + eth_timesync_write_time timesync_write_time; +}; + +/** + * Function type used for RX packet processing packet callbacks. + * + * The callback function is called on RX with a burst of packets that have + * been received on the given port and queue. + * + * @param port + * The Ethernet port on which RX is being performed. + * @param queue + * The queue on the Ethernet port which is being used to receive the packets. + * @param pkts + * The burst of packets that have just been received. + * @param nb_pkts + * The number of packets in the burst pointed to by "pkts". + * @param max_pkts + * The max number of packets that can be stored in the "pkts" array. + * @param user_param + * The arbitrary user parameter passed in by the application when the callback + * was originally configured. + * @return + * The number of packets returned to the user. + */ +typedef uint16_t (*rte_rx_callback_fn)(uint8_t port, uint16_t queue, + struct rte_mbuf *pkts[], uint16_t nb_pkts, uint16_t max_pkts, + void *user_param); + +/** + * Function type used for TX packet processing packet callbacks. + * + * The callback function is called on TX with a burst of packets immediately + * before the packets are put onto the hardware queue for transmission. + * + * @param port + * The Ethernet port on which TX is being performed. + * @param queue + * The queue on the Ethernet port which is being used to transmit the packets. + * @param pkts + * The burst of packets that are about to be transmitted. + * @param nb_pkts + * The number of packets in the burst pointed to by "pkts". + * @param user_param + * The arbitrary user parameter passed in by the application when the callback + * was originally configured. + * @return + * The number of packets to be written to the NIC. + */ +typedef uint16_t (*rte_tx_callback_fn)(uint8_t port, uint16_t queue, + struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param); + +/** + * @internal + * Structure used to hold information about the callbacks to be called for a + * queue on RX and TX. + */ +struct rte_eth_rxtx_callback { + struct rte_eth_rxtx_callback *next; + union{ + rte_rx_callback_fn rx; + rte_tx_callback_fn tx; + } fn; + void *param; +}; + +/** + * The eth device type. + */ +enum rte_eth_dev_type { + RTE_ETH_DEV_UNKNOWN, /**< unknown device type */ + RTE_ETH_DEV_PCI, + /**< Physical function and Virtual function of PCI devices */ + RTE_ETH_DEV_VIRTUAL, /**< non hardware device */ + RTE_ETH_DEV_MAX /**< max value of this enum */ +}; + +/** + * @internal + * The generic data structure associated with each ethernet device. + * + * Pointers to burst-oriented packet receive and transmit functions are + * located at the beginning of the structure, along with the pointer to + * where all the data elements for the particular device are stored in shared + * memory. This split allows the function pointer and driver data to be per- + * process, while the actual configuration data for the device is shared. + */ +struct rte_eth_dev { + eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */ + eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */ + struct rte_eth_dev_data *data; /**< Pointer to device data */ + const struct eth_driver *driver;/**< Driver for this device */ + const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */ + struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */ + /** User application callbacks for NIC interrupts */ + struct rte_eth_dev_cb_list link_intr_cbs; + /** + * User-supplied functions called from rx_burst to post-process + * received packets before passing them to the user + */ + struct rte_eth_rxtx_callback *post_rx_burst_cbs[RTE_MAX_QUEUES_PER_PORT]; + /** + * User-supplied functions called from tx_burst to pre-process + * received packets before passing them to the driver for transmission. + */ + struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT]; + uint8_t attached; /**< Flag indicating the port is attached */ + enum rte_eth_dev_type dev_type; /**< Flag indicating the device type */ +}; + +struct rte_eth_dev_sriov { + uint8_t active; /**< SRIOV is active with 16, 32 or 64 pools */ + uint8_t nb_q_per_pool; /**< rx queue number per pool */ + uint16_t def_vmdq_idx; /**< Default pool num used for PF */ + uint16_t def_pool_q_idx; /**< Default pool queue start reg index */ +}; +#define RTE_ETH_DEV_SRIOV(dev) ((dev)->data->sriov) + +#define RTE_ETH_NAME_MAX_LEN (32) + +/** + * @internal + * The data part, with no function pointers, associated with each ethernet device. + * + * This structure is safe to place in shared memory to be common among different + * processes in a multi-process configuration. + */ +struct rte_eth_dev_data { + char name[RTE_ETH_NAME_MAX_LEN]; /**< Unique identifier name */ + + void **rx_queues; /**< Array of pointers to RX queues. */ + void **tx_queues; /**< Array of pointers to TX queues. */ + uint16_t nb_rx_queues; /**< Number of RX queues. */ + uint16_t nb_tx_queues; /**< Number of TX queues. */ + + struct rte_eth_dev_sriov sriov; /**< SRIOV data */ + + void *dev_private; /**< PMD-specific private data */ + + struct rte_eth_link dev_link; + /**< Link-level information & status */ + + struct rte_eth_conf dev_conf; /**< Configuration applied to device. */ + uint16_t mtu; /**< Maximum Transmission Unit. */ + + uint32_t min_rx_buf_size; + /**< Common rx buffer size handled by all queues */ + + uint64_t rx_mbuf_alloc_failed; /**< RX ring mbuf allocation failures. */ + struct ether_addr* mac_addrs;/**< Device Ethernet Link address. */ + uint64_t mac_pool_sel[ETH_NUM_RECEIVE_MAC_ADDR]; + /** bitmap array of associating Ethernet MAC addresses to pools */ + struct ether_addr* hash_mac_addrs; + /** Device Ethernet MAC addresses of hash filtering. */ + uint8_t port_id; /**< Device [external] port identifier. */ + uint8_t promiscuous : 1, /**< RX promiscuous mode ON(1) / OFF(0). */ + scattered_rx : 1, /**< RX of scattered packets is ON(1) / OFF(0) */ + all_multicast : 1, /**< RX all multicast mode ON(1) / OFF(0). */ + dev_started : 1, /**< Device state: STARTED(1) / STOPPED(0). */ + lro : 1; /**< RX LRO is ON(1) / OFF(0) */ + uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT]; + /** Queues state: STARTED(1) / STOPPED(0) */ + uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT]; + /** Queues state: STARTED(1) / STOPPED(0) */ + uint32_t dev_flags; /**< Capabilities */ + enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ + int numa_node; /**< NUMA node connection */ + const char *drv_name; /**< Driver name */ +}; + +/** Device supports hotplug detach */ +#define RTE_ETH_DEV_DETACHABLE 0x0001 +/** Device supports link state interrupt */ +#define RTE_ETH_DEV_INTR_LSC 0x0002 + +/** + * @internal + * The pool of *rte_eth_dev* structures. The size of the pool + * is configured at compile-time in the file. + */ +extern struct rte_eth_dev rte_eth_devices[]; + +/** + * Get the total number of Ethernet devices that have been successfully + * initialized by the [matching] Ethernet driver during the PCI probing phase. + * All devices whose port identifier is in the range + * [0, rte_eth_dev_count() - 1] can be operated on by network applications + * immediately after invoking rte_eal_init(). + * If the application unplugs a port using hotplug function, The enabled port + * numbers may be noncontiguous. In the case, the applications need to manage + * enabled port by themselves. + * + * @return + * - The total number of usable Ethernet devices. + */ +extern uint8_t rte_eth_dev_count(void); + +/** + * @internal + * Returns a ethdev slot specified by the unique identifier name. + * + * @param name + * The pointer to the Unique identifier name for each Ethernet device + * @return + * - The pointer to the ethdev slot, on success. NULL on error + */ +extern struct rte_eth_dev *rte_eth_dev_allocated(const char *name); + +/** + * @internal + * Allocates a new ethdev slot for an ethernet device and returns the pointer + * to that slot for the driver to use. + * + * @param name Unique identifier name for each Ethernet device + * @param type Device type of this Ethernet device + * @return + * - Slot in the rte_dev_devices array for a new device; + */ +struct rte_eth_dev *rte_eth_dev_allocate(const char *name, + enum rte_eth_dev_type type); + +/** + * @internal + * Release the specified ethdev port. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure. + * @return + * - 0 on success, negative on error + */ +int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev); + +/** + * Attach a new Ethernet device specified by aruguments. + * + * @param devargs + * A pointer to a strings array describing the new device + * to be attached. The strings should be a pci address like + * '0000:01:00.0' or virtual device name like 'eth_pcap0'. + * @param port_id + * A pointer to a port identifier actually attached. + * @return + * 0 on success and port_id is filled, negative on error + */ +int rte_eth_dev_attach(const char *devargs, uint8_t *port_id); + +/** + * Detach a Ethernet device specified by port identifier. + * This function must be called when the device is in the + * closed state. + * + * @param port_id + * The port identifier of the device to detach. + * @param devname + * A pointer to a device name actually detached. + * @return + * 0 on success and devname is filled, negative on error + */ +int rte_eth_dev_detach(uint8_t port_id, char *devname); + +struct eth_driver; +/** + * @internal + * Initialization function of an Ethernet driver invoked for each matching + * Ethernet PCI device detected during the PCI probing phase. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure + * associated with the matching device and which have been [automatically] + * allocated in the *rte_eth_devices* array. + * The *eth_dev* structure is supplied to the driver initialization function + * with the following fields already initialized: + * + * - *pci_dev*: Holds the pointers to the *rte_pci_device* structure which + * contains the generic PCI information of the matching device. + * + * - *driver*: Holds the pointer to the *eth_driver* structure. + * + * - *dev_private*: Holds a pointer to the device private data structure. + * + * - *mtu*: Contains the default Ethernet maximum frame length (1500). + * + * - *port_id*: Contains the port index of the device (actually the index + * of the *eth_dev* structure in the *rte_eth_devices* array). + * + * @return + * - 0: Success, the device is properly initialized by the driver. + * In particular, the driver MUST have set up the *dev_ops* pointer + * of the *eth_dev* structure. + * - <0: Error code of the device initialization failure. + */ +typedef int (*eth_dev_init_t)(struct rte_eth_dev *eth_dev); + +/** + * @internal + * Finalization function of an Ethernet driver invoked for each matching + * Ethernet PCI device detected during the PCI closing phase. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure + * associated with the matching device and which have been [automatically] + * allocated in the *rte_eth_devices* array. + * @return + * - 0: Success, the device is properly finalized by the driver. + * In particular, the driver MUST free the *dev_ops* pointer + * of the *eth_dev* structure. + * - <0: Error code of the device initialization failure. + */ +typedef int (*eth_dev_uninit_t)(struct rte_eth_dev *eth_dev); + +/** + * @internal + * The structure associated with a PMD Ethernet driver. + * + * Each Ethernet driver acts as a PCI driver and is represented by a generic + * *eth_driver* structure that holds: + * + * - An *rte_pci_driver* structure (which must be the first field). + * + * - The *eth_dev_init* function invoked for each matching PCI device. + * + * - The *eth_dev_uninit* function invoked for each matching PCI device. + * + * - The size of the private data to allocate for each matching device. + */ +struct eth_driver { + struct rte_pci_driver pci_drv; /**< The PMD is also a PCI driver. */ + eth_dev_init_t eth_dev_init; /**< Device init function. */ + eth_dev_uninit_t eth_dev_uninit; /**< Device uninit function. */ + unsigned int dev_private_size; /**< Size of device private data. */ +}; + +/** + * @internal + * A function invoked by the initialization function of an Ethernet driver + * to simultaneously register itself as a PCI driver and as an Ethernet + * Poll Mode Driver (PMD). + * + * @param eth_drv + * The pointer to the *eth_driver* structure associated with + * the Ethernet driver. + */ +extern void rte_eth_driver_register(struct eth_driver *eth_drv); + +/** + * Configure an Ethernet device. + * This function must be invoked first before any other function in the + * Ethernet API. This function can also be re-invoked when a device is in the + * stopped state. + * + * @param port_id + * The port identifier of the Ethernet device to configure. + * @param nb_rx_queue + * The number of receive queues to set up for the Ethernet device. + * @param nb_tx_queue + * The number of transmit queues to set up for the Ethernet device. + * @param eth_conf + * The pointer to the configuration data to be used for the Ethernet device. + * The *rte_eth_conf* structure includes: + * - the hardware offload features to activate, with dedicated fields for + * each statically configurable offload hardware feature provided by + * Ethernet devices, such as IP checksum or VLAN tag stripping for + * example. + * - the Receive Side Scaling (RSS) configuration when using multiple RX + * queues per port. + * + * Embedding all configuration information in a single data structure + * is the more flexible method that allows the addition of new features + * without changing the syntax of the API. + * @return + * - 0: Success, device configured. + * - <0: Error code returned by the driver configuration function. + */ +extern int rte_eth_dev_configure(uint8_t port_id, + uint16_t nb_rx_queue, + uint16_t nb_tx_queue, + const struct rte_eth_conf *eth_conf); + +/** + * Allocate and set up a receive queue for an Ethernet device. + * + * The function allocates a contiguous block of memory for *nb_rx_desc* + * receive descriptors from a memory zone associated with *socket_id* + * and initializes each receive descriptor with a network buffer allocated + * from the memory pool *mb_pool*. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rx_queue_id + * The index of the receive queue to set up. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param nb_rx_desc + * The number of receive descriptors to allocate for the receive ring. + * @param socket_id + * The *socket_id* argument is the socket identifier in case of NUMA. + * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for + * the DMA memory allocated for the receive descriptors of the ring. + * @param rx_conf + * The pointer to the configuration data to be used for the receive queue. + * NULL value is allowed, in which case default RX configuration + * will be used. + * The *rx_conf* structure contains an *rx_thresh* structure with the values + * of the Prefetch, Host, and Write-Back threshold registers of the receive + * ring. + * @param mb_pool + * The pointer to the memory pool from which to allocate *rte_mbuf* network + * memory buffers to populate each descriptor of the receive ring. + * @return + * - 0: Success, receive queue correctly set up. + * - -EINVAL: The size of network buffers which can be allocated from the + * memory pool does not fit the various buffer sizes allowed by the + * device controller. + * - -ENOMEM: Unable to allocate the receive ring descriptors or to + * allocate network memory buffers from the memory pool when + * initializing receive descriptors. + */ +extern int rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, + uint16_t nb_rx_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool); + +/** + * Allocate and set up a transmit queue for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tx_queue_id + * The index of the transmit queue to set up. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param nb_tx_desc + * The number of transmit descriptors to allocate for the transmit ring. + * @param socket_id + * The *socket_id* argument is the socket identifier in case of NUMA. + * Its value can be *SOCKET_ID_ANY* if there is no NUMA constraint for + * the DMA memory allocated for the transmit descriptors of the ring. + * @param tx_conf + * The pointer to the configuration data to be used for the transmit queue. + * NULL value is allowed, in which case default RX configuration + * will be used. + * The *tx_conf* structure contains the following data: + * - The *tx_thresh* structure with the values of the Prefetch, Host, and + * Write-Back threshold registers of the transmit ring. + * When setting Write-Back threshold to the value greater then zero, + * *tx_rs_thresh* value should be explicitly set to one. + * - The *tx_free_thresh* value indicates the [minimum] number of network + * buffers that must be pending in the transmit ring to trigger their + * [implicit] freeing by the driver transmit function. + * - The *tx_rs_thresh* value indicates the [minimum] number of transmit + * descriptors that must be pending in the transmit ring before setting the + * RS bit on a descriptor by the driver transmit function. + * The *tx_rs_thresh* value should be less or equal then + * *tx_free_thresh* value, and both of them should be less then + * *nb_tx_desc* - 3. + * - The *txq_flags* member contains flags to pass to the TX queue setup + * function to configure the behavior of the TX queue. This should be set + * to 0 if no special configuration is required. + * + * Note that setting *tx_free_thresh* or *tx_rs_thresh* value to 0 forces + * the transmit function to use default values. + * @return + * - 0: Success, the transmit queue is correctly set up. + * - -ENOMEM: Unable to allocate the transmit ring descriptors. + */ +extern int rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id, + uint16_t nb_tx_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); + +/* + * Return the NUMA socket to which an Ethernet device is connected + * + * @param port_id + * The port identifier of the Ethernet device + * @return + * The NUMA socket id to which the Ethernet device is connected or + * a default of zero if the socket could not be determined. + * -1 is returned is the port_id value is out of range. + */ +extern int rte_eth_dev_socket_id(uint8_t port_id); + +/* + * Check if port_id of device is attached + * + * @param port_id + * The port identifier of the Ethernet device + * @return + * - 0 if port is out of range or not attached + * - 1 if device is attached + */ +extern int rte_eth_dev_is_valid_port(uint8_t port_id); + +/* + * Allocate mbuf from mempool, setup the DMA physical address + * and then start RX for specified queue of a port. It is used + * when rx_deferred_start flag of the specified queue is true. + * + * @param port_id + * The port identifier of the Ethernet device + * @param rx_queue_id + * The index of the rx queue to update the ring. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - 0: Success, the transmit queue is correctly set up. + * - -EINVAL: The port_id or the queue_id out of range. + * - -ENOTSUP: The function not supported in PMD driver. + */ +extern int rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id); + +/* + * Stop specified RX queue of a port + * + * @param port_id + * The port identifier of the Ethernet device + * @param rx_queue_id + * The index of the rx queue to update the ring. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - 0: Success, the transmit queue is correctly set up. + * - -EINVAL: The port_id or the queue_id out of range. + * - -ENOTSUP: The function not supported in PMD driver. + */ +extern int rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id); + +/* + * Start TX for specified queue of a port. It is used when tx_deferred_start + * flag of the specified queue is true. + * + * @param port_id + * The port identifier of the Ethernet device + * @param tx_queue_id + * The index of the tx queue to update the ring. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - 0: Success, the transmit queue is correctly set up. + * - -EINVAL: The port_id or the queue_id out of range. + * - -ENOTSUP: The function not supported in PMD driver. + */ +extern int rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id); + +/* + * Stop specified TX queue of a port + * + * @param port_id + * The port identifier of the Ethernet device + * @param tx_queue_id + * The index of the tx queue to update the ring. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - 0: Success, the transmit queue is correctly set up. + * - -EINVAL: The port_id or the queue_id out of range. + * - -ENOTSUP: The function not supported in PMD driver. + */ +extern int rte_eth_dev_tx_queue_stop(uint8_t port_id, uint16_t tx_queue_id); + + + +/** + * Start an Ethernet device. + * + * The device start step is the last one and consists of setting the configured + * offload features and in starting the transmit and the receive units of the + * device. + * On success, all basic functions exported by the Ethernet API (link status, + * receive/transmit, and so on) can be invoked. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - 0: Success, Ethernet device started. + * - <0: Error code of the driver device start function. + */ +extern int rte_eth_dev_start(uint8_t port_id); + +/** + * Stop an Ethernet device. The device can be restarted with a call to + * rte_eth_dev_start() + * + * @param port_id + * The port identifier of the Ethernet device. + */ +extern void rte_eth_dev_stop(uint8_t port_id); + + +/** + * Link up an Ethernet device. + * + * Set device link up will re-enable the device rx/tx + * functionality after it is previously set device linked down. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - 0: Success, Ethernet device linked up. + * - <0: Error code of the driver device link up function. + */ +extern int rte_eth_dev_set_link_up(uint8_t port_id); + +/** + * Link down an Ethernet device. + * The device rx/tx functionality will be disabled if success, + * and it can be re-enabled with a call to + * rte_eth_dev_set_link_up() + * + * @param port_id + * The port identifier of the Ethernet device. + */ +extern int rte_eth_dev_set_link_down(uint8_t port_id); + +/** + * Close a stopped Ethernet device. The device cannot be restarted! + * The function frees all resources except for needed by the + * closed state. To free these resources, call rte_eth_dev_detach(). + * + * @param port_id + * The port identifier of the Ethernet device. + */ +extern void rte_eth_dev_close(uint8_t port_id); + +/** + * Enable receipt in promiscuous mode for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +extern void rte_eth_promiscuous_enable(uint8_t port_id); + +/** + * Disable receipt in promiscuous mode for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +extern void rte_eth_promiscuous_disable(uint8_t port_id); + +/** + * Return the value of promiscuous mode for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (1) if promiscuous is enabled + * - (0) if promiscuous is disabled. + * - (-1) on error + */ +extern int rte_eth_promiscuous_get(uint8_t port_id); + +/** + * Enable the receipt of any multicast frame by an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +extern void rte_eth_allmulticast_enable(uint8_t port_id); + +/** + * Disable the receipt of all multicast frames by an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +extern void rte_eth_allmulticast_disable(uint8_t port_id); + +/** + * Return the value of allmulticast mode for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (1) if allmulticast is enabled + * - (0) if allmulticast is disabled. + * - (-1) on error + */ +extern int rte_eth_allmulticast_get(uint8_t port_id); + +/** + * Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX + * or FULL-DUPLEX) of the physical link of an Ethernet device. It might need + * to wait up to 9 seconds in it. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param link + * A pointer to an *rte_eth_link* structure to be filled with + * the status, the speed and the mode of the Ethernet device link. + */ +extern void rte_eth_link_get(uint8_t port_id, struct rte_eth_link *link); + +/** + * Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX + * or FULL-DUPLEX) of the physical link of an Ethernet device. It is a no-wait + * version of rte_eth_link_get(). + * + * @param port_id + * The port identifier of the Ethernet device. + * @param link + * A pointer to an *rte_eth_link* structure to be filled with + * the status, the speed and the mode of the Ethernet device link. + */ +extern void rte_eth_link_get_nowait(uint8_t port_id, + struct rte_eth_link *link); + +/** + * Retrieve the general I/O statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param stats + * A pointer to a structure of type *rte_eth_stats* to be filled with + * the values of device counters for the following set of statistics: + * - *ipackets* with the total of successfully received packets. + * - *opackets* with the total of successfully transmitted packets. + * - *ibytes* with the total of successfully received bytes. + * - *obytes* with the total of successfully transmitted bytes. + * - *ierrors* with the total of erroneous received packets. + * - *oerrors* with the total of failed transmitted packets. + * @return + * Zero if successful. Non-zero otherwise. + */ +extern int rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats); + +/** + * Reset the general I/O statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +extern void rte_eth_stats_reset(uint8_t port_id); + +/** + * Retrieve extended statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param xstats + * A pointer to a table of structure of type *rte_eth_xstats* + * to be filled with device statistics names and values. + * This parameter can be set to NULL if n is 0. + * @param n + * The size of the stats table, which should be large enough to store + * all the statistics of the device. + * @return + * - positive value lower or equal to n: success. The return value + * is the number of entries filled in the stats table. + * - positive value higher than n: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + * - negative value on error (invalid port id) + */ +extern int rte_eth_xstats_get(uint8_t port_id, + struct rte_eth_xstats *xstats, unsigned n); + +/** + * Reset extended statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +extern void rte_eth_xstats_reset(uint8_t port_id); + +/** + * Set a mapping for the specified transmit queue to the specified per-queue + * statistics counter. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tx_queue_id + * The index of the transmit queue for which a queue stats mapping is required. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param stat_idx + * The per-queue packet statistics functionality number that the transmit + * queue is to be assigned. + * The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1]. + * @return + * Zero if successful. Non-zero otherwise. + */ +extern int rte_eth_dev_set_tx_queue_stats_mapping(uint8_t port_id, + uint16_t tx_queue_id, + uint8_t stat_idx); + +/** + * Set a mapping for the specified receive queue to the specified per-queue + * statistics counter. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rx_queue_id + * The index of the receive queue for which a queue stats mapping is required. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param stat_idx + * The per-queue packet statistics functionality number that the receive + * queue is to be assigned. + * The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1]. + * @return + * Zero if successful. Non-zero otherwise. + */ +extern int rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, + uint16_t rx_queue_id, + uint8_t stat_idx); + +/** + * Retrieve the Ethernet address of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mac_addr + * A pointer to a structure of type *ether_addr* to be filled with + * the Ethernet address of the Ethernet device. + */ +extern void rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr); + +/** + * Retrieve the contextual information of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param dev_info + * A pointer to a structure of type *rte_eth_dev_info* to be filled with + * the contextual information of the Ethernet device. + */ +extern void rte_eth_dev_info_get(uint8_t port_id, + struct rte_eth_dev_info *dev_info); + +/** + * Retrieve the MTU of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mtu + * A pointer to a uint16_t where the retrieved MTU is to be stored. + * @return + * - (0) if successful. + * - (-ENODEV) if *port_id* invalid. + */ +extern int rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu); + +/** + * Change the MTU of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mtu + * A uint16_t for the MTU to be applied. + * @return + * - (0) if successful. + * - (-ENOTSUP) if operation is not supported. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if *mtu* invalid. + */ +extern int rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu); + +/** + * Enable/Disable hardware filtering by an Ethernet device of received + * VLAN packets tagged with a given VLAN Tag Identifier. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param vlan_id + * The VLAN Tag Identifier whose filtering must be enabled or disabled. + * @param on + * If > 0, enable VLAN filtering of VLAN packets tagged with *vlan_id*. + * Otherwise, disable VLAN filtering of VLAN packets tagged with *vlan_id*. + * @return + * - (0) if successful. + * - (-ENOSUP) if hardware-assisted VLAN filtering not configured. + * - (-ENODEV) if *port_id* invalid. + * - (-ENOSYS) if VLAN filtering on *port_id* disabled. + * - (-EINVAL) if *vlan_id* > 4095. + */ +extern int rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id , int on); + +/** + * Enable/Disable hardware VLAN Strip by a rx queue of an Ethernet device. + * 82599/X540/X550 can support VLAN stripping at the rx queue level + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rx_queue_id + * The index of the receive queue for which a queue stats mapping is required. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param on + * If 1, Enable VLAN Stripping of the receive queue of the Ethernet port. + * If 0, Disable VLAN Stripping of the receive queue of the Ethernet port. + * @return + * - (0) if successful. + * - (-ENOSUP) if hardware-assisted VLAN stripping not configured. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if *rx_queue_id* invalid. + */ +extern int rte_eth_dev_set_vlan_strip_on_queue(uint8_t port_id, + uint16_t rx_queue_id, int on); + +/** + * Set the Outer VLAN Ether Type by an Ethernet device, it can be inserted to + * the VLAN Header. This is a register setup available on some Intel NIC, not + * but all, please check the data sheet for availability. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tag_type + * The Tag Protocol ID + * @return + * - (0) if successful. + * - (-ENOSUP) if hardware-assisted VLAN TPID setup is not supported. + * - (-ENODEV) if *port_id* invalid. + */ +extern int rte_eth_dev_set_vlan_ether_type(uint8_t port_id, uint16_t tag_type); + +/** + * Set VLAN offload configuration on an Ethernet device + * Enable/Disable Extended VLAN by an Ethernet device, This is a register setup + * available on some Intel NIC, not but all, please check the data sheet for + * availability. + * Enable/Disable VLAN Strip can be done on rx queue for certain NIC, but here + * the configuration is applied on the port level. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param offload_mask + * The VLAN Offload bit mask can be mixed use with "OR" + * ETH_VLAN_STRIP_OFFLOAD + * ETH_VLAN_FILTER_OFFLOAD + * ETH_VLAN_EXTEND_OFFLOAD + * @return + * - (0) if successful. + * - (-ENOSUP) if hardware-assisted VLAN filtering not configured. + * - (-ENODEV) if *port_id* invalid. + */ +extern int rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask); + +/** + * Read VLAN Offload configuration from an Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (>0) if successful. Bit mask to indicate + * ETH_VLAN_STRIP_OFFLOAD + * ETH_VLAN_FILTER_OFFLOAD + * ETH_VLAN_EXTEND_OFFLOAD + * - (-ENODEV) if *port_id* invalid. + */ +extern int rte_eth_dev_get_vlan_offload(uint8_t port_id); + +/** + * Set port based TX VLAN insersion on or off. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param pvid + * Port based TX VLAN identifier togeth with user priority. + * @param on + * Turn on or off the port based TX VLAN insertion. + * + * @return + * - (0) if successful. + * - negative if failed. + */ +extern int rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on); + +/** + * + * Retrieve a burst of input packets from a receive queue of an Ethernet + * device. The retrieved packets are stored in *rte_mbuf* structures whose + * pointers are supplied in the *rx_pkts* array. + * + * The rte_eth_rx_burst() function loops, parsing the RX ring of the + * receive queue, up to *nb_pkts* packets, and for each completed RX + * descriptor in the ring, it performs the following operations: + * + * - Initialize the *rte_mbuf* data structure associated with the + * RX descriptor according to the information provided by the NIC into + * that RX descriptor. + * + * - Store the *rte_mbuf* data structure into the next entry of the + * *rx_pkts* array. + * + * - Replenish the RX descriptor with a new *rte_mbuf* buffer + * allocated from the memory pool associated with the receive queue at + * initialization time. + * + * When retrieving an input packet that was scattered by the controller + * into multiple receive descriptors, the rte_eth_rx_burst() function + * appends the associated *rte_mbuf* buffers to the first buffer of the + * packet. + * + * The rte_eth_rx_burst() function returns the number of packets + * actually retrieved, which is the number of *rte_mbuf* data structures + * effectively supplied into the *rx_pkts* array. + * A return value equal to *nb_pkts* indicates that the RX queue contained + * at least *rx_pkts* packets, and this is likely to signify that other + * received packets remain in the input queue. Applications implementing + * a "retrieve as much received packets as possible" policy can check this + * specific case and keep invoking the rte_eth_rx_burst() function until + * a value less than *nb_pkts* is returned. + * + * This receive method has the following advantages: + * + * - It allows a run-to-completion network stack engine to retrieve and + * to immediately process received packets in a fast burst-oriented + * approach, avoiding the overhead of unnecessary intermediate packet + * queue/dequeue operations. + * + * - Conversely, it also allows an asynchronous-oriented processing + * method to retrieve bursts of received packets and to immediately + * queue them for further parallel processing by another logical core, + * for instance. However, instead of having received packets being + * individually queued by the driver, this approach allows the invoker + * of the rte_eth_rx_burst() function to queue a burst of retrieved + * packets at a time and therefore dramatically reduce the cost of + * enqueue/dequeue operations per packet. + * + * - It allows the rte_eth_rx_burst() function of the driver to take + * advantage of burst-oriented hardware features (CPU cache, + * prefetch instructions, and so on) to minimize the number of CPU + * cycles per packet. + * + * To summarize, the proposed receive API enables many + * burst-oriented optimizations in both synchronous and asynchronous + * packet processing environments with no overhead in both cases. + * + * The rte_eth_rx_burst() function does not provide any error + * notification to avoid the corresponding overhead. As a hint, the + * upper-level application might check the status of the device link once + * being systematically returned a 0 value for a given number of tries. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the receive queue from which to retrieve input packets. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param rx_pkts + * The address of an array of pointers to *rte_mbuf* structures that + * must be large enough to store *nb_pkts* pointers in it. + * @param nb_pkts + * The maximum number of packets to retrieve. + * @return + * The number of packets actually retrieved, which is the number + * of pointers to *rte_mbuf* structures effectively supplied to the + * *rx_pkts* array. + */ +static inline uint16_t +rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id, + struct rte_mbuf **rx_pkts, const uint16_t nb_pkts) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0); + RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0); + + if (queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id); + return 0; + } +#endif + int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], + rx_pkts, nb_pkts); + +#ifdef RTE_ETHDEV_RXTX_CALLBACKS + struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id]; + + if (unlikely(cb != NULL)) { + do { + nb_rx = cb->fn.rx(port_id, queue_id, rx_pkts, nb_rx, + nb_pkts, cb->param); + cb = cb->next; + } while (cb != NULL); + } +#endif + + return nb_rx; +} + +/** + * Get the number of used descriptors in a specific queue + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue id on the specific port. + * @return + * The number of used descriptors in the specific queue, or: + * (-EINVAL) if *port_id* is invalid + * (-ENOTSUP) if the device does not support this function + */ +static inline int +rte_eth_rx_queue_count(uint8_t port_id, uint16_t queue_id) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_count, -ENOTSUP); + return (*dev->dev_ops->rx_queue_count)(dev, queue_id); +} + +/** + * Check if the DD bit of the specific RX descriptor in the queue has been set + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue id on the specific port. + * @param offset + * The offset of the descriptor ID from tail. + * @return + * - (1) if the specific DD bit is set. + * - (0) if the specific DD bit is not set. + * - (-ENODEV) if *port_id* invalid. + * - (-ENOTSUP) if the device does not support this function + */ +static inline int +rte_eth_rx_descriptor_done(uint8_t port_id, uint16_t queue_id, uint16_t offset) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_descriptor_done, -ENOTSUP); + return (*dev->dev_ops->rx_descriptor_done)( \ + dev->data->rx_queues[queue_id], offset); +} + +/** + * Send a burst of output packets on a transmit queue of an Ethernet device. + * + * The rte_eth_tx_burst() function is invoked to transmit output packets + * on the output queue *queue_id* of the Ethernet device designated by its + * *port_id*. + * The *nb_pkts* parameter is the number of packets to send which are + * supplied in the *tx_pkts* array of *rte_mbuf* structures. + * The rte_eth_tx_burst() function loops, sending *nb_pkts* packets, + * up to the number of transmit descriptors available in the TX ring of the + * transmit queue. + * For each packet to send, the rte_eth_tx_burst() function performs + * the following operations: + * + * - Pick up the next available descriptor in the transmit ring. + * + * - Free the network buffer previously sent with that descriptor, if any. + * + * - Initialize the transmit descriptor with the information provided + * in the *rte_mbuf data structure. + * + * In the case of a segmented packet composed of a list of *rte_mbuf* buffers, + * the rte_eth_tx_burst() function uses several transmit descriptors + * of the ring. + * + * The rte_eth_tx_burst() function returns the number of packets it + * actually sent. A return value equal to *nb_pkts* means that all packets + * have been sent, and this is likely to signify that other output packets + * could be immediately transmitted again. Applications that implement a + * "send as many packets to transmit as possible" policy can check this + * specific case and keep invoking the rte_eth_tx_burst() function until + * a value less than *nb_pkts* is returned. + * + * It is the responsibility of the rte_eth_tx_burst() function to + * transparently free the memory buffers of packets previously sent. + * This feature is driven by the *tx_free_thresh* value supplied to the + * rte_eth_dev_configure() function at device configuration time. + * When the number of free TX descriptors drops below this threshold, the + * rte_eth_tx_burst() function must [attempt to] free the *rte_mbuf* buffers + * of those packets whose transmission was effectively completed. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param tx_pkts + * The address of an array of *nb_pkts* pointers to *rte_mbuf* structures + * which contain the output packets. + * @param nb_pkts + * The maximum number of packets to transmit. + * @return + * The number of output packets actually stored in transmit descriptors of + * the transmit ring. The return value can be less than the value of the + * *tx_pkts* parameter when the transmit ring is full or has been filled up. + */ +static inline uint16_t +rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0); + RTE_FUNC_PTR_OR_ERR_RET(*dev->tx_pkt_burst, 0); + + if (queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id); + return 0; + } +#endif + +#ifdef RTE_ETHDEV_RXTX_CALLBACKS + struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id]; + + if (unlikely(cb != NULL)) { + do { + nb_pkts = cb->fn.tx(port_id, queue_id, tx_pkts, nb_pkts, + cb->param); + cb = cb->next; + } while (cb != NULL); + } +#endif + + return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts); +} + +/** + * The eth device event type for interrupt, and maybe others in the future. + */ +enum rte_eth_event_type { + RTE_ETH_EVENT_UNKNOWN, /**< unknown event type */ + RTE_ETH_EVENT_INTR_LSC, /**< lsc interrupt event */ + RTE_ETH_EVENT_MAX /**< max value of this enum */ +}; + +typedef void (*rte_eth_dev_cb_fn)(uint8_t port_id, \ + enum rte_eth_event_type event, void *cb_arg); +/**< user application callback to be registered for interrupts */ + + + +/** + * Register a callback function for specific port id. + * + * @param port_id + * Port id. + * @param event + * Event interested. + * @param cb_fn + * User supplied callback function to be called. + * @param cb_arg + * Pointer to the parameters for the registered callback. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_eth_dev_callback_register(uint8_t port_id, + enum rte_eth_event_type event, + rte_eth_dev_cb_fn cb_fn, void *cb_arg); + +/** + * Unregister a callback function for specific port id. + * + * @param port_id + * Port id. + * @param event + * Event interested. + * @param cb_fn + * User supplied callback function to be called. + * @param cb_arg + * Pointer to the parameters for the registered callback. -1 means to + * remove all for the same callback address and same event. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_eth_dev_callback_unregister(uint8_t port_id, + enum rte_eth_event_type event, + rte_eth_dev_cb_fn cb_fn, void *cb_arg); + +/** + * @internal Executes all the user application registered callbacks for + * the specific device. It is for DPDK internal user only. User + * application should not call it directly. + * + * @param dev + * Pointer to struct rte_eth_dev. + * @param event + * Eth device interrupt event type. + * + * @return + * void + */ +void _rte_eth_dev_callback_process(struct rte_eth_dev *dev, + enum rte_eth_event_type event); + +/** + * When there is no rx packet coming in Rx Queue for a long time, we can + * sleep lcore related to RX Queue for power saving, and enable rx interrupt + * to be triggered when rx packect arrives. + * + * The rte_eth_dev_rx_intr_enable() function enables rx queue + * interrupt on specific rx queue of a port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the receive queue from which to retrieve input packets. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - (0) if successful. + * - (-ENOTSUP) if underlying hardware OR driver doesn't support + * that operation. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_rx_intr_enable(uint8_t port_id, uint16_t queue_id); + +/** + * When lcore wakes up from rx interrupt indicating packet coming, disable rx + * interrupt and returns to polling mode. + * + * The rte_eth_dev_rx_intr_disable() function disables rx queue + * interrupt on specific rx queue of a port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the receive queue from which to retrieve input packets. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - (0) if successful. + * - (-ENOTSUP) if underlying hardware OR driver doesn't support + * that operation. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_rx_intr_disable(uint8_t port_id, uint16_t queue_id); + +/** + * RX Interrupt control per port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param epfd + * Epoll instance fd which the intr vector associated to. + * Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance. + * @param op + * The operation be performed for the vector. + * Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}. + * @param data + * User raw data. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data); + +/** + * RX Interrupt control per queue. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the receive queue from which to retrieve input packets. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param epfd + * Epoll instance fd which the intr vector associated to. + * Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance. + * @param op + * The operation be performed for the vector. + * Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}. + * @param data + * User raw data. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id, + int epfd, int op, void *data); + +/** + * Turn on the LED on the Ethernet device. + * This function turns on the LED on the Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if underlying hardware OR driver doesn't support + * that operation. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_led_on(uint8_t port_id); + +/** + * Turn off the LED on the Ethernet device. + * This function turns off the LED on the Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if underlying hardware OR driver doesn't support + * that operation. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_led_off(uint8_t port_id); + +/** + * Get current status of the Ethernet link flow control for Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @param fc_conf + * The pointer to the structure where to store the flow control parameters. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support flow control. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_flow_ctrl_get(uint8_t port_id, + struct rte_eth_fc_conf *fc_conf); + +/** + * Configure the Ethernet link flow control for Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @param fc_conf + * The pointer to the structure of the flow control parameters. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support flow control mode. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter + * - (-EIO) if flow control setup failure + */ +int rte_eth_dev_flow_ctrl_set(uint8_t port_id, + struct rte_eth_fc_conf *fc_conf); + +/** + * Configure the Ethernet priority flow control under DCB environment + * for Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param pfc_conf + * The pointer to the structure of the priority flow control parameters. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support priority flow control mode. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter + * - (-EIO) if flow control setup failure + */ +int rte_eth_dev_priority_flow_ctrl_set(uint8_t port_id, + struct rte_eth_pfc_conf *pfc_conf); + +/** + * Add a MAC address to an internal array of addresses used to enable whitelist + * filtering to accept packets only if the destination MAC address matches. + * + * @param port + * The port identifier of the Ethernet device. + * @param mac_addr + * The MAC address to add. + * @param pool + * VMDq pool index to associate address with (if VMDq is enabled). If VMDq is + * not enabled, this should be set to 0. + * @return + * - (0) if successfully added or *mac_addr" was already added. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port* is invalid. + * - (-ENOSPC) if no more MAC addresses can be added. + * - (-EINVAL) if MAC address is invalid. + */ +int rte_eth_dev_mac_addr_add(uint8_t port, struct ether_addr *mac_addr, + uint32_t pool); + +/** + * Remove a MAC address from the internal array of addresses. + * + * @param port + * The port identifier of the Ethernet device. + * @param mac_addr + * MAC address to remove. + * @return + * - (0) if successful, or *mac_addr* didn't exist. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port* invalid. + * - (-EADDRINUSE) if attempting to remove the default MAC address + */ +int rte_eth_dev_mac_addr_remove(uint8_t port, struct ether_addr *mac_addr); + +/** + * Set the default MAC address. + * + * @param port + * The port identifier of the Ethernet device. + * @param mac_addr + * New default MAC address. + * @return + * - (0) if successful, or *mac_addr* didn't exist. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if MAC address is invalid. + */ +int rte_eth_dev_default_mac_addr_set(uint8_t port, struct ether_addr *mac_addr); + + +/** + * Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device. + * + * @param port + * The port identifier of the Ethernet device. + * @param reta_conf + * RETA to update. + * @param reta_size + * Redirection table size. The table size can be queried by + * rte_eth_dev_info_get(). + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_rss_reta_update(uint8_t port, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); + + /** + * Query Redirection Table(RETA) of Receive Side Scaling of Ethernet device. + * + * @param port + * The port identifier of the Ethernet device. + * @param reta_conf + * RETA to query. + * @param reta_size + * Redirection table size. The table size can be queried by + * rte_eth_dev_info_get(). + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_rss_reta_query(uint8_t port, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); + + /** + * Updates unicast hash table for receiving packet with the given destination + * MAC address, and the packet is routed to all VFs for which the RX mode is + * accept packets that match the unicast hash table. + * + * @param port + * The port identifier of the Ethernet device. + * @param addr + * Unicast MAC address. + * @param on + * 1 - Set an unicast hash bit for receiving packets with the MAC address. + * 0 - Clear an unicast hash bit. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_uc_hash_table_set(uint8_t port,struct ether_addr *addr, + uint8_t on); + + /** + * Updates all unicast hash bitmaps for receiving packet with any Unicast + * Ethernet MAC addresses,the packet is routed to all VFs for which the RX + * mode is accept packets that match the unicast hash table. + * + * @param port + * The port identifier of the Ethernet device. + * @param on + * 1 - Set all unicast hash bitmaps for receiving all the Ethernet + * MAC addresses + * 0 - Clear all unicast hash bitmaps + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_uc_all_hash_table_set(uint8_t port,uint8_t on); + + /** + * Set RX L2 Filtering mode of a VF of an Ethernet device. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * VF id. + * @param rx_mode + * The RX mode mask, which is one or more of accepting Untagged Packets, + * packets that match the PFUTA table, Broadcast and Multicast Promiscuous. + * ETH_VMDQ_ACCEPT_UNTAG,ETH_VMDQ_ACCEPT_HASH_UC, + * ETH_VMDQ_ACCEPT_BROADCAST and ETH_VMDQ_ACCEPT_MULTICAST will be used + * in rx_mode. + * @param on + * 1 - Enable a VF RX mode. + * 0 - Disable a VF RX mode. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_set_vf_rxmode(uint8_t port, uint16_t vf, uint16_t rx_mode, + uint8_t on); + +/** +* Enable or disable a VF traffic transmit of the Ethernet device. +* +* @param port +* The port identifier of the Ethernet device. +* @param vf +* VF id. +* @param on +* 1 - Enable a VF traffic transmit. +* 0 - Disable a VF traffic transmit. +* @return +* - (0) if successful. +* - (-ENODEV) if *port_id* invalid. +* - (-ENOTSUP) if hardware doesn't support. +* - (-EINVAL) if bad parameter. +*/ +int +rte_eth_dev_set_vf_tx(uint8_t port,uint16_t vf, uint8_t on); + +/** +* Enable or disable a VF traffic receive of an Ethernet device. +* +* @param port +* The port identifier of the Ethernet device. +* @param vf +* VF id. +* @param on +* 1 - Enable a VF traffic receive. +* 0 - Disable a VF traffic receive. +* @return +* - (0) if successful. +* - (-ENOTSUP) if hardware doesn't support. +* - (-ENODEV) if *port_id* invalid. +* - (-EINVAL) if bad parameter. +*/ +int +rte_eth_dev_set_vf_rx(uint8_t port,uint16_t vf, uint8_t on); + +/** +* Enable/Disable hardware VF VLAN filtering by an Ethernet device of +* received VLAN packets tagged with a given VLAN Tag Identifier. +* +* @param port id +* The port identifier of the Ethernet device. +* @param vlan_id +* The VLAN Tag Identifier whose filtering must be enabled or disabled. +* @param vf_mask +* Bitmap listing which VFs participate in the VLAN filtering. +* @param vlan_on +* 1 - Enable VFs VLAN filtering. +* 0 - Disable VFs VLAN filtering. +* @return +* - (0) if successful. +* - (-ENOTSUP) if hardware doesn't support. +* - (-ENODEV) if *port_id* invalid. +* - (-EINVAL) if bad parameter. +*/ +int +rte_eth_dev_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id, + uint64_t vf_mask, + uint8_t vlan_on); + +/** + * Set a traffic mirroring rule on an Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mirror_conf + * The pointer to the traffic mirroring structure describing the mirroring rule. + * The *rte_eth_vm_mirror_conf* structure includes the type of mirroring rule, + * destination pool and the value of rule if enable vlan or pool mirroring. + * + * @param rule_id + * The index of traffic mirroring rule, we support four separated rules. + * @param on + * 1 - Enable a mirroring rule. + * 0 - Disable a mirroring rule. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if the mr_conf information is not correct. + */ +int rte_eth_mirror_rule_set(uint8_t port_id, + struct rte_eth_mirror_conf *mirror_conf, + uint8_t rule_id, + uint8_t on); + +/** + * Reset a traffic mirroring rule on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rule_id + * The index of traffic mirroring rule, we support four separated rules. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_mirror_rule_reset(uint8_t port_id, + uint8_t rule_id); + +/** + * Set the rate limitation for a queue on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_idx + * The queue id. + * @param tx_rate + * The tx rate in Mbps. Allocated from the total port link speed. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, + uint16_t tx_rate); + +/** + * Set the rate limitation for a vf on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param vf + * VF id. + * @param tx_rate + * The tx rate allocated from the total link speed for this VF id. + * @param q_msk + * The queue mask which need to set the rate. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf, + uint16_t tx_rate, uint64_t q_msk); + +/** + * Initialize bypass logic. This function needs to be called before + * executing any other bypass API. + * + * @param port + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_bypass_init(uint8_t port); + +/** + * Return bypass state. + * + * @param port + * The port identifier of the Ethernet device. + * @param state + * The return bypass state. + * - (1) Normal mode + * - (2) Bypass mode + * - (3) Isolate mode + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_bypass_state_show(uint8_t port, uint32_t *state); + +/** + * Set bypass state + * + * @param port + * The port identifier of the Ethernet device. + * @param new_state + * The current bypass state. + * - (1) Normal mode + * - (2) Bypass mode + * - (3) Isolate mode + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_bypass_state_set(uint8_t port, uint32_t *new_state); + +/** + * Return bypass state when given event occurs. + * + * @param port + * The port identifier of the Ethernet device. + * @param event + * The bypass event + * - (1) Main power on (power button is pushed) + * - (2) Auxiliary power on (power supply is being plugged) + * - (3) Main power off (system shutdown and power supply is left plugged in) + * - (4) Auxiliary power off (power supply is being unplugged) + * - (5) Display or set the watchdog timer + * @param state + * The bypass state when given event occurred. + * - (1) Normal mode + * - (2) Bypass mode + * - (3) Isolate mode + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_bypass_event_show(uint8_t port, uint32_t event, uint32_t *state); + +/** + * Set bypass state when given event occurs. + * + * @param port + * The port identifier of the Ethernet device. + * @param event + * The bypass event + * - (1) Main power on (power button is pushed) + * - (2) Auxiliary power on (power supply is being plugged) + * - (3) Main power off (system shutdown and power supply is left plugged in) + * - (4) Auxiliary power off (power supply is being unplugged) + * - (5) Display or set the watchdog timer + * @param state + * The assigned state when given event occurs. + * - (1) Normal mode + * - (2) Bypass mode + * - (3) Isolate mode + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_bypass_event_store(uint8_t port, uint32_t event, uint32_t state); + +/** + * Set bypass watchdog timeout count. + * + * @param port + * The port identifier of the Ethernet device. + * @param timeout + * The timeout to be set. + * - (0) 0 seconds (timer is off) + * - (1) 1.5 seconds + * - (2) 2 seconds + * - (3) 3 seconds + * - (4) 4 seconds + * - (5) 8 seconds + * - (6) 16 seconds + * - (7) 32 seconds + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_wd_timeout_store(uint8_t port, uint32_t timeout); + +/** + * Get bypass firmware version. + * + * @param port + * The port identifier of the Ethernet device. + * @param ver + * The firmware version + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_bypass_ver_show(uint8_t port, uint32_t *ver); + +/** + * Return bypass watchdog timeout in seconds + * + * @param port + * The port identifier of the Ethernet device. + * @param wd_timeout + * The return watchdog timeout. "0" represents timer expired + * - (0) 0 seconds (timer is off) + * - (1) 1.5 seconds + * - (2) 2 seconds + * - (3) 3 seconds + * - (4) 4 seconds + * - (5) 8 seconds + * - (6) 16 seconds + * - (7) 32 seconds + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_bypass_wd_timeout_show(uint8_t port, uint32_t *wd_timeout); + +/** + * Reset bypass watchdog timer + * + * @param port + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_bypass_wd_reset(uint8_t port); + + /** + * Configuration of Receive Side Scaling hash computation of Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rss_conf + * The new configuration to use for RSS hash computation on the port. + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_rss_hash_update(uint8_t port_id, + struct rte_eth_rss_conf *rss_conf); + + /** + * Retrieve current configuration of Receive Side Scaling hash computation + * of Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rss_conf + * Where to store the current RSS hash configuration of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-ENOTSUP) if hardware doesn't support RSS. + */ +int +rte_eth_dev_rss_hash_conf_get(uint8_t port_id, + struct rte_eth_rss_conf *rss_conf); + + /** + * Add UDP tunneling port of an Ethernet device for filtering a specific + * tunneling packet by UDP port number. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tunnel_udp + * UDP tunneling configuration. + * + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-ENOTSUP) if hardware doesn't support tunnel type. + */ +int +rte_eth_dev_udp_tunnel_add(uint8_t port_id, + struct rte_eth_udp_tunnel *tunnel_udp); + + /** + * Detete UDP tunneling port configuration of Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tunnel_udp + * UDP tunneling configuration. + * + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-ENOTSUP) if hardware doesn't support tunnel type. + */ +int +rte_eth_dev_udp_tunnel_delete(uint8_t port_id, + struct rte_eth_udp_tunnel *tunnel_udp); + +/** + * Check whether the filter type is supported on an Ethernet device. + * All the supported filter types are defined in 'rte_eth_ctrl.h'. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param filter_type + * Filter type. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this filter type. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_type); + +/** + * Take operations to assigned filter type on an Ethernet device. + * All the supported operations and filter types are defined in 'rte_eth_ctrl.h'. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param filter_type + * Filter type. + * @param filter_op + * Type of operation. + * @param arg + * A pointer to arguments defined specifically for the operation. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type, + enum rte_filter_op filter_op, void *arg); + +/** + * Get DCB information on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param dcb_info + * dcb information. + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-ENOTSUP) if hardware doesn't support. + */ +int rte_eth_dev_get_dcb_info(uint8_t port_id, + struct rte_eth_dcb_info *dcb_info); + +/** + * Add a callback to be called on packet RX on a given port and queue. + * + * This API configures a function to be called for each burst of + * packets received on a given NIC port queue. The return value is a pointer + * that can be used to later remove the callback using + * rte_eth_remove_rx_callback(). + * + * Multiple functions are called in the order that they are added. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device on which the callback is to be added. + * @param fn + * The callback function + * @param user_param + * A generic pointer parameter which will be passed to each invocation of the + * callback function on this port and queue. + * + * @return + * NULL on error. + * On success, a pointer value which can later be used to remove the callback. + */ +void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id, + rte_rx_callback_fn fn, void *user_param); + +/** + * Add a callback to be called on packet TX on a given port and queue. + * + * This API configures a function to be called for each burst of + * packets sent on a given NIC port queue. The return value is a pointer + * that can be used to later remove the callback using + * rte_eth_remove_tx_callback(). + * + * Multiple functions are called in the order that they are added. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device on which the callback is to be added. + * @param fn + * The callback function + * @param user_param + * A generic pointer parameter which will be passed to each invocation of the + * callback function on this port and queue. + * + * @return + * NULL on error. + * On success, a pointer value which can later be used to remove the callback. + */ +void *rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id, + rte_tx_callback_fn fn, void *user_param); + +/** + * Remove an RX packet callback from a given port and queue. + * + * This function is used to removed callbacks that were added to a NIC port + * queue using rte_eth_add_rx_callback(). + * + * Note: the callback is removed from the callback list but it isn't freed + * since the it may still be in use. The memory for the callback can be + * subsequently freed back by the application by calling rte_free(): + * + * - Immediately - if the port is stopped, or the user knows that no + * callbacks are in flight e.g. if called from the thread doing RX/TX + * on that queue. + * + * - After a short delay - where the delay is sufficient to allow any + * in-flight callbacks to complete. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device from which the callback is to be removed. + * @param user_cb + * User supplied callback created via rte_eth_add_rx_callback(). + * + * @return + * - 0: Success. Callback was removed. + * - -ENOTSUP: Callback support is not available. + * - -EINVAL: The port_id or the queue_id is out of range, or the callback + * is NULL or not found for the port/queue. + */ +int rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id, + struct rte_eth_rxtx_callback *user_cb); + +/** + * Remove a TX packet callback from a given port and queue. + * + * This function is used to removed callbacks that were added to a NIC port + * queue using rte_eth_add_tx_callback(). + * + * Note: the callback is removed from the callback list but it isn't freed + * since the it may still be in use. The memory for the callback can be + * subsequently freed back by the application by calling rte_free(): + * + * - Immediately - if the port is stopped, or the user knows that no + * callbacks are in flight e.g. if called from the thread doing RX/TX + * on that queue. + * + * - After a short delay - where the delay is sufficient to allow any + * in-flight callbacks to complete. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device from which the callback is to be removed. + * @param user_cb + * User supplied callback created via rte_eth_add_tx_callback(). + * + * @return + * - 0: Success. Callback was removed. + * - -ENOTSUP: Callback support is not available. + * - -EINVAL: The port_id or the queue_id is out of range, or the callback + * is NULL or not found for the port/queue. + */ +int rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id, + struct rte_eth_rxtx_callback *user_cb); + +/** + * Retrieve information about given port's RX queue. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The RX queue on the Ethernet device for which information + * will be retrieved. + * @param qinfo + * A pointer to a structure of type *rte_eth_rxq_info_info* to be filled with + * the information of the Ethernet device. + * + * @return + * - 0: Success + * - -ENOTSUP: routine is not supported by the device PMD. + * - -EINVAL: The port_id or the queue_id is out of range. + */ +int rte_eth_rx_queue_info_get(uint8_t port_id, uint16_t queue_id, + struct rte_eth_rxq_info *qinfo); + +/** + * Retrieve information about given port's TX queue. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The TX queue on the Ethernet device for which information + * will be retrieved. + * @param qinfo + * A pointer to a structure of type *rte_eth_txq_info_info* to be filled with + * the information of the Ethernet device. + * + * @return + * - 0: Success + * - -ENOTSUP: routine is not supported by the device PMD. + * - -EINVAL: The port_id or the queue_id is out of range. + */ +int rte_eth_tx_queue_info_get(uint8_t port_id, uint16_t queue_id, + struct rte_eth_txq_info *qinfo); + +/* + * Retrieve number of available registers for access + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (>=0) number of registers if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_get_reg_length(uint8_t port_id); + +/** + * Retrieve device registers and register attributes + * + * @param port_id + * The port identifier of the Ethernet device. + * @param info + * The template includes buffer for register data and attribute to be filled. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_get_reg_info(uint8_t port_id, struct rte_dev_reg_info *info); + +/** + * Retrieve size of device EEPROM + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (>=0) EEPROM size if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_get_eeprom_length(uint8_t port_id); + +/** + * Retrieve EEPROM and EEPROM attribute + * + * @param port_id + * The port identifier of the Ethernet device. + * @param info + * The template includes buffer for return EEPROM data and + * EEPROM attributes to be filled. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_get_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info); + +/** + * Program EEPROM with provided data + * + * @param port_id + * The port identifier of the Ethernet device. + * @param info + * The template includes EEPROM data for programming and + * EEPROM attributes to be filled + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_set_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info); + +/** + * Set the list of multicast addresses to filter on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mc_addr_set + * The array of multicast addresses to set. Equal to NULL when the function + * is invoked to flush the set of filtered addresses. + * @param nb_mc_addr + * The number of multicast addresses in the *mc_addr_set* array. Equal to 0 + * when the function is invoked to flush the set of filtered addresses. + * @return + * - (0) if successful. + * - (-ENODEV) if *port_id* invalid. + * - (-ENOTSUP) if PMD of *port_id* doesn't support multicast filtering. + * - (-ENOSPC) if *port_id* has not enough multicast filtering resources. + */ +int rte_eth_dev_set_mc_addr_list(uint8_t port_id, + struct ether_addr *mc_addr_set, + uint32_t nb_mc_addr); + +/** + * Enable IEEE1588/802.1AS timestamping for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * + * @return + * - 0: Success. + * - -ENODEV: The port ID is invalid. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +extern int rte_eth_timesync_enable(uint8_t port_id); + +/** + * Disable IEEE1588/802.1AS timestamping for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * + * @return + * - 0: Success. + * - -ENODEV: The port ID is invalid. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +extern int rte_eth_timesync_disable(uint8_t port_id); + +/** + * Read an IEEE1588/802.1AS RX timestamp from an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param timestamp + * Pointer to the timestamp struct. + * @param flags + * Device specific flags. Used to pass the RX timesync register index to + * i40e. Unused in igb/ixgbe, pass 0 instead. + * + * @return + * - 0: Success. + * - -EINVAL: No timestamp is available. + * - -ENODEV: The port ID is invalid. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +extern int rte_eth_timesync_read_rx_timestamp(uint8_t port_id, + struct timespec *timestamp, + uint32_t flags); + +/** + * Read an IEEE1588/802.1AS TX timestamp from an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param timestamp + * Pointer to the timestamp struct. + * + * @return + * - 0: Success. + * - -EINVAL: No timestamp is available. + * - -ENODEV: The port ID is invalid. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +extern int rte_eth_timesync_read_tx_timestamp(uint8_t port_id, + struct timespec *timestamp); + +/** + * Adjust the timesync clock on an Ethernet device. + * + * This is usually used in conjunction with other Ethdev timesync functions to + * synchronize the device time using the IEEE1588/802.1AS protocol. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param delta + * The adjustment in nanoseconds. + * + * @return + * - 0: Success. + * - -ENODEV: The port ID is invalid. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +extern int rte_eth_timesync_adjust_time(uint8_t port_id, int64_t delta); + +/** + * Read the time from the timesync clock on an Ethernet device. + * + * This is usually used in conjunction with other Ethdev timesync functions to + * synchronize the device time using the IEEE1588/802.1AS protocol. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param time + * Pointer to the timespec struct that holds the time. + * + * @return + * - 0: Success. + */ +extern int rte_eth_timesync_read_time(uint8_t port_id, struct timespec *time); + +/** + * Set the time of the timesync clock on an Ethernet device. + * + * This is usually used in conjunction with other Ethdev timesync functions to + * synchronize the device time using the IEEE1588/802.1AS protocol. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param time + * Pointer to the timespec struct that holds the time. + * + * @return + * - 0: Success. + * - -EINVAL: No timestamp is available. + * - -ENODEV: The port ID is invalid. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +extern int rte_eth_timesync_write_time(uint8_t port_id, + const struct timespec *time); + +/** + * Copy pci device info to the Ethernet device data. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure. + * @param pci_dev + * The *pci_dev* pointer is the address of the *rte_pci_device* structure. + * + * @return + * - 0 on success, negative on error + */ +extern void rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, struct rte_pci_device *pci_dev); + + +/** + * Create memzone for HW rings. + * malloc can't be used as the physical address is needed. + * If the memzone is already created, then this function returns a ptr + * to the old one. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure + * @param name + * The name of the memory zone + * @param queue_id + * The index of the queue to add to name + * @param size + * The sizeof of the memory area + * @param align + * Alignment for resulting memzone. Must be a power of 2. + * @param socket_id + * The *socket_id* argument is the socket identifier in case of NUMA. + */ +const struct rte_memzone * +rte_eth_dma_zone_reserve(const struct rte_eth_dev *eth_dev, const char *name, + uint16_t queue_id, size_t size, + unsigned align, int socket_id); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ETHDEV_H_ */ diff --git a/src/dpdk22/lib/librte_ether/rte_ether.h b/src/dpdk22/lib/librte_ether/rte_ether.h new file mode 100644 index 00000000..07c17d7e --- /dev/null +++ b/src/dpdk22/lib/librte_ether/rte_ether.h @@ -0,0 +1,416 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETHER_H_ +#define _RTE_ETHER_H_ + +/** + * @file + * + * Ethernet Helpers in RTE + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include +#include +#include +#include + +#define ETHER_ADDR_LEN 6 /**< Length of Ethernet address. */ +#define ETHER_TYPE_LEN 2 /**< Length of Ethernet type field. */ +#define ETHER_CRC_LEN 4 /**< Length of Ethernet CRC. */ +#define ETHER_HDR_LEN \ + (ETHER_ADDR_LEN * 2 + ETHER_TYPE_LEN) /**< Length of Ethernet header. */ +#define ETHER_MIN_LEN 64 /**< Minimum frame len, including CRC. */ +#define ETHER_MAX_LEN 1518 /**< Maximum frame len, including CRC. */ +#define ETHER_MTU \ + (ETHER_MAX_LEN - ETHER_HDR_LEN - ETHER_CRC_LEN) /**< Ethernet MTU. */ + +#define ETHER_MAX_VLAN_FRAME_LEN \ + (ETHER_MAX_LEN + 4) /**< Maximum VLAN frame length, including CRC. */ + +#define ETHER_MAX_JUMBO_FRAME_LEN \ + 0x3F00 /**< Maximum Jumbo frame length, including CRC. */ + +#define ETHER_MAX_VLAN_ID 4095 /**< Maximum VLAN ID. */ + +#define ETHER_MIN_MTU 68 /**< Minimum MTU for IPv4 packets, see RFC 791. */ + +/** + * Ethernet address: + * A universally administered address is uniquely assigned to a device by its + * manufacturer. The first three octets (in transmission order) contain the + * Organizationally Unique Identifier (OUI). The following three (MAC-48 and + * EUI-48) octets are assigned by that organization with the only constraint + * of uniqueness. + * A locally administered address is assigned to a device by a network + * administrator and does not contain OUIs. + * See http://standards.ieee.org/regauth/groupmac/tutorial.html + */ +struct ether_addr { + uint8_t addr_bytes[ETHER_ADDR_LEN]; /**< Address bytes in transmission order */ +} __attribute__((__packed__)); + +#define ETHER_LOCAL_ADMIN_ADDR 0x02 /**< Locally assigned Eth. address. */ +#define ETHER_GROUP_ADDR 0x01 /**< Multicast or broadcast Eth. address. */ + +/** + * Check if two Ethernet addresses are the same. + * + * @param ea1 + * A pointer to the first ether_addr structure containing + * the ethernet address. + * @param ea2 + * A pointer to the second ether_addr structure containing + * the ethernet address. + * + * @return + * True (1) if the given two ethernet address are the same; + * False (0) otherwise. + */ +static inline int is_same_ether_addr(const struct ether_addr *ea1, + const struct ether_addr *ea2) +{ + int i; + for (i = 0; i < ETHER_ADDR_LEN; i++) + if (ea1->addr_bytes[i] != ea2->addr_bytes[i]) + return 0; + return 1; +} + +/** + * Check if an Ethernet address is filled with zeros. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is filled with zeros; + * false (0) otherwise. + */ +static inline int is_zero_ether_addr(const struct ether_addr *ea) +{ + int i; + for (i = 0; i < ETHER_ADDR_LEN; i++) + if (ea->addr_bytes[i] != 0x00) + return 0; + return 1; +} + +/** + * Check if an Ethernet address is a unicast address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a unicast address; + * false (0) otherwise. + */ +static inline int is_unicast_ether_addr(const struct ether_addr *ea) +{ + return ((ea->addr_bytes[0] & ETHER_GROUP_ADDR) == 0); +} + +/** + * Check if an Ethernet address is a multicast address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a multicast address; + * false (0) otherwise. + */ +static inline int is_multicast_ether_addr(const struct ether_addr *ea) +{ + return (ea->addr_bytes[0] & ETHER_GROUP_ADDR); +} + +/** + * Check if an Ethernet address is a broadcast address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a broadcast address; + * false (0) otherwise. + */ +static inline int is_broadcast_ether_addr(const struct ether_addr *ea) +{ + const unaligned_uint16_t *ea_words = (const unaligned_uint16_t *)ea; + + return (ea_words[0] == 0xFFFF && ea_words[1] == 0xFFFF && + ea_words[2] == 0xFFFF); +} + +/** + * Check if an Ethernet address is a universally assigned address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a universally assigned address; + * false (0) otherwise. + */ +static inline int is_universal_ether_addr(const struct ether_addr *ea) +{ + return ((ea->addr_bytes[0] & ETHER_LOCAL_ADMIN_ADDR) == 0); +} + +/** + * Check if an Ethernet address is a locally assigned address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a locally assigned address; + * false (0) otherwise. + */ +static inline int is_local_admin_ether_addr(const struct ether_addr *ea) +{ + return ((ea->addr_bytes[0] & ETHER_LOCAL_ADMIN_ADDR) != 0); +} + +/** + * Check if an Ethernet address is a valid address. Checks that the address is a + * unicast address and is not filled with zeros. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is valid; + * false (0) otherwise. + */ +static inline int is_valid_assigned_ether_addr(const struct ether_addr *ea) +{ + return (is_unicast_ether_addr(ea) && (! is_zero_ether_addr(ea))); +} + +/** + * Generate a random Ethernet address that is locally administered + * and not multicast. + * @param addr + * A pointer to Ethernet address. + */ +static inline void eth_random_addr(uint8_t *addr) +{ + uint64_t rand = rte_rand(); + uint8_t *p = (uint8_t*)&rand; + + rte_memcpy(addr, p, ETHER_ADDR_LEN); + addr[0] &= ~ETHER_GROUP_ADDR; /* clear multicast bit */ + addr[0] |= ETHER_LOCAL_ADMIN_ADDR; /* set local assignment bit */ +} + +/** + * Fast copy an Ethernet address. + * + * @param ea_from + * A pointer to a ether_addr structure holding the Ethernet address to copy. + * @param ea_to + * A pointer to a ether_addr structure where to copy the Ethernet address. + */ +static inline void ether_addr_copy(const struct ether_addr *ea_from, + struct ether_addr *ea_to) +{ +#ifdef __INTEL_COMPILER + uint16_t *from_words = (uint16_t *)(ea_from->addr_bytes); + uint16_t *to_words = (uint16_t *)(ea_to->addr_bytes); + + to_words[0] = from_words[0]; + to_words[1] = from_words[1]; + to_words[2] = from_words[2]; +#else + /* + * Use the common way, because of a strange gcc warning. + */ + *ea_to = *ea_from; +#endif +} + +#define ETHER_ADDR_FMT_SIZE 18 +/** + * Format 48bits Ethernet address in pattern xx:xx:xx:xx:xx:xx. + * + * @param buf + * A pointer to buffer contains the formatted MAC address. + * @param size + * The format buffer size. + * @param eth_addr + * A pointer to a ether_addr structure. + */ +static inline void +ether_format_addr(char *buf, uint16_t size, + const struct ether_addr *eth_addr) +{ + snprintf(buf, size, "%02X:%02X:%02X:%02X:%02X:%02X", + eth_addr->addr_bytes[0], + eth_addr->addr_bytes[1], + eth_addr->addr_bytes[2], + eth_addr->addr_bytes[3], + eth_addr->addr_bytes[4], + eth_addr->addr_bytes[5]); +} + +/** + * Ethernet header: Contains the destination address, source address + * and frame type. + */ +struct ether_hdr { + struct ether_addr d_addr; /**< Destination address. */ + struct ether_addr s_addr; /**< Source address. */ + uint16_t ether_type; /**< Frame type. */ +} __attribute__((__packed__)); + +/** + * Ethernet VLAN Header. + * Contains the 16-bit VLAN Tag Control Identifier and the Ethernet type + * of the encapsulated frame. + */ +struct vlan_hdr { + uint16_t vlan_tci; /**< Priority (3) + CFI (1) + Identifier Code (12) */ + uint16_t eth_proto;/**< Ethernet type of encapsulated frame. */ +} __attribute__((__packed__)); + +/** + * VXLAN protocol header. + * Contains the 8-bit flag, 24-bit VXLAN Network Identifier and + * Reserved fields (24 bits and 8 bits) + */ +struct vxlan_hdr { + uint32_t vx_flags; /**< flag (8) + Reserved (24). */ + uint32_t vx_vni; /**< VNI (24) + Reserved (8). */ +} __attribute__((__packed__)); + +/* Ethernet frame types */ +#define ETHER_TYPE_IPv4 0x0800 /**< IPv4 Protocol. */ +#define ETHER_TYPE_IPv6 0x86DD /**< IPv6 Protocol. */ +#define ETHER_TYPE_ARP 0x0806 /**< Arp Protocol. */ +#define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */ +#define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */ +#define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */ +#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */ +#define ETHER_TYPE_TEB 0x6558 /**< Transparent Ethernet Bridging. */ + +#define ETHER_VXLAN_HLEN (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr)) +/**< VXLAN tunnel header length. */ + +/** + * Extract VLAN tag information into mbuf + * + * Software version of VLAN stripping + * + * @param m + * The packet mbuf. + * @return + * - 0: Success + * - 1: not a vlan packet + */ +static inline int rte_vlan_strip(struct rte_mbuf *m) +{ + struct ether_hdr *eh + = rte_pktmbuf_mtod(m, struct ether_hdr *); + + if (eh->ether_type != rte_cpu_to_be_16(ETHER_TYPE_VLAN)) + return -1; + + struct vlan_hdr *vh = (struct vlan_hdr *)(eh + 1); + m->ol_flags |= PKT_RX_VLAN_PKT; + m->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci); + + /* Copy ether header over rather than moving whole packet */ + memmove(rte_pktmbuf_adj(m, sizeof(struct vlan_hdr)), + eh, 2 * ETHER_ADDR_LEN); + + return 0; +} + +/** + * Insert VLAN tag into mbuf. + * + * Software version of VLAN unstripping + * + * @param m + * The packet mbuf. + * @return + * - 0: On success + * -EPERM: mbuf is is shared overwriting would be unsafe + * -ENOSPC: not enough headroom in mbuf + */ +static inline int rte_vlan_insert(struct rte_mbuf **m) +{ + struct ether_hdr *oh, *nh; + struct vlan_hdr *vh; + + /* Can't insert header if mbuf is shared */ + if (rte_mbuf_refcnt_read(*m) > 1) { + struct rte_mbuf *copy; + + copy = rte_pktmbuf_clone(*m, (*m)->pool); + if (unlikely(copy == NULL)) + return -ENOMEM; + rte_pktmbuf_free(*m); + *m = copy; + } + + oh = rte_pktmbuf_mtod(*m, struct ether_hdr *); + nh = (struct ether_hdr *) + rte_pktmbuf_prepend(*m, sizeof(struct vlan_hdr)); + if (nh == NULL) + return -ENOSPC; + + memmove(nh, oh, 2 * ETHER_ADDR_LEN); + nh->ether_type = rte_cpu_to_be_16(ETHER_TYPE_VLAN); + + vh = (struct vlan_hdr *) (nh + 1); + vh->vlan_tci = rte_cpu_to_be_16((*m)->vlan_tci); + + return 0; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ETHER_H_ */ diff --git a/src/dpdk22/lib/librte_hash/rte_cmp_arm64.h b/src/dpdk22/lib/librte_hash/rte_cmp_arm64.h new file mode 100644 index 00000000..6fd937b1 --- /dev/null +++ b/src/dpdk22/lib/librte_hash/rte_cmp_arm64.h @@ -0,0 +1,114 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Cavium networks. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */ +static int +rte_hash_k16_cmp_eq(const void *key1, const void *key2, + size_t key_len __rte_unused) +{ + uint64_t x0, x1, y0, y1; + + asm volatile( + "ldp %x[x1], %x[x0], [%x[p1]]" + : [x1]"=r"(x1), [x0]"=r"(x0) + : [p1]"r"(key1) + ); + asm volatile( + "ldp %x[y1], %x[y0], [%x[p2]]" + : [y1]"=r"(y1), [y0]"=r"(y0) + : [p2]"r"(key2) + ); + x0 ^= y0; + x1 ^= y1; + return !(x0 == 0 && x1 == 0); +} + +static int +rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k16_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 16, + (const char *) key2 + 16, key_len); +} + +static int +rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k16_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 16, + (const char *) key2 + 16, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 32, + (const char *) key2 + 32, key_len); +} + +static int +rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k32_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 32, + (const char *) key2 + 32, key_len); +} + +static int +rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} + +static int +rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} + +static int +rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 96, + (const char *) key2 + 96, key_len); +} + +static int +rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k64_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} diff --git a/src/dpdk22/lib/librte_hash/rte_cmp_x86.h b/src/dpdk22/lib/librte_hash/rte_cmp_x86.h new file mode 100644 index 00000000..7f79bace --- /dev/null +++ b/src/dpdk22/lib/librte_hash/rte_cmp_x86.h @@ -0,0 +1,109 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */ +static int +rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ + const __m128i k1 = _mm_loadu_si128((const __m128i *) key1); + const __m128i k2 = _mm_loadu_si128((const __m128i *) key2); +#ifdef RTE_MACHINE_CPUFLAG_SSE4_1 + const __m128i x = _mm_xor_si128(k1, k2); + + return !_mm_test_all_zeros(x, x); +#else + const __m128i x = _mm_cmpeq_epi32(k1, k2); + + return (_mm_movemask_epi8(x) != 0xffff); +#endif +} + +static int +rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k16_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 16, + (const char *) key2 + 16, key_len); +} + +static int +rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k16_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 16, + (const char *) key2 + 16, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 32, + (const char *) key2 + 32, key_len); +} + +static int +rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k32_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 32, + (const char *) key2 + 32, key_len); +} + +static int +rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} + +static int +rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} + +static int +rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 96, + (const char *) key2 + 96, key_len); +} + +static int +rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k64_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} diff --git a/src/dpdk22/lib/librte_hash/rte_crc_arm64.h b/src/dpdk22/lib/librte_hash/rte_crc_arm64.h new file mode 100644 index 00000000..02e26bca --- /dev/null +++ b/src/dpdk22/lib/librte_hash/rte_crc_arm64.h @@ -0,0 +1,151 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Cavium networks. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CRC_ARM64_H_ +#define _RTE_CRC_ARM64_H_ + +/** + * @file + * + * RTE CRC arm64 Hash + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +static inline uint32_t +crc32c_arm64_u32(uint32_t data, uint32_t init_val) +{ + asm(".arch armv8-a+crc"); + __asm__ volatile( + "crc32cw %w[crc], %w[crc], %w[value]" + : [crc] "+r" (init_val) + : [value] "r" (data)); + return init_val; +} + +static inline uint32_t +crc32c_arm64_u64(uint64_t data, uint32_t init_val) +{ + asm(".arch armv8-a+crc"); + __asm__ volatile( + "crc32cx %w[crc], %w[crc], %x[value]" + : [crc] "+r" (init_val) + : [value] "r" (data)); + return init_val; +} + +/** + * Allow or disallow use of arm64 SIMD instrinsics for CRC32 hash + * calculation. + * + * @param alg + * An OR of following flags: + * - (CRC32_SW) Don't use arm64 crc intrinsics + * - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available + * + */ +static inline void +rte_hash_crc_set_alg(uint8_t alg) +{ + switch (alg) { + case CRC32_ARM64: + if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32)) + alg = CRC32_SW; + case CRC32_SW: + crc32_alg = alg; + default: + break; + } +} + +/* Setting the best available algorithm */ +static inline void __attribute__((constructor)) +rte_hash_crc_init_alg(void) +{ + rte_hash_crc_set_alg(CRC32_ARM64); +} + +/** + * Use single crc32 instruction to perform a hash on a 4 byte value. + * Fall back to software crc32 implementation in case arm64 crc intrinsics is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_4byte(uint32_t data, uint32_t init_val) +{ + if (likely(crc32_alg & CRC32_ARM64)) + return crc32c_arm64_u32(data, init_val); + + return crc32c_1word(data, init_val); +} + +/** + * Use single crc32 instruction to perform a hash on a 8 byte value. + * Fall back to software crc32 implementation in case arm64 crc intrinsics is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_8byte(uint64_t data, uint32_t init_val) +{ + if (likely(crc32_alg == CRC32_ARM64)) + return crc32c_arm64_u64(data, init_val); + + return crc32c_2words(data, init_val); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CRC_ARM64_H_ */ diff --git a/src/dpdk22/lib/librte_hash/rte_cuckoo_hash.c b/src/dpdk22/lib/librte_hash/rte_cuckoo_hash.c new file mode 100644 index 00000000..3e3167c5 --- /dev/null +++ b/src/dpdk22/lib/librte_hash/rte_cuckoo_hash.c @@ -0,0 +1,1243 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include /* for definition of RTE_CACHE_LINE_SIZE */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_hash.h" +#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32) +#include "rte_cmp_x86.h" +#endif + +#if defined(RTE_ARCH_ARM64) +#include "rte_cmp_arm64.h" +#endif + +TAILQ_HEAD(rte_hash_list, rte_tailq_entry); + +static struct rte_tailq_elem rte_hash_tailq = { + .name = "RTE_HASH", +}; +EAL_REGISTER_TAILQ(rte_hash_tailq) + +/* Macro to enable/disable run-time checking of function parameters */ +#if defined(RTE_LIBRTE_HASH_DEBUG) +#define RETURN_IF_TRUE(cond, retval) do { \ + if (cond) \ + return retval; \ +} while (0) +#else +#define RETURN_IF_TRUE(cond, retval) +#endif + +/* Hash function used if none is specified */ +#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#include +#define DEFAULT_HASH_FUNC rte_hash_crc +#else +#include +#define DEFAULT_HASH_FUNC rte_jhash +#endif + +/** Number of items per bucket. */ +#define RTE_HASH_BUCKET_ENTRIES 4 + +#define NULL_SIGNATURE 0 + +#define KEY_ALIGNMENT 16 + +#define LCORE_CACHE_SIZE 8 + +struct lcore_cache { + unsigned len; /**< Cache len */ + void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */ +} __rte_cache_aligned; + +/** A hash table structure. */ +struct rte_hash { + char name[RTE_HASH_NAMESIZE]; /**< Name of the hash. */ + uint32_t entries; /**< Total table entries. */ + uint32_t num_buckets; /**< Number of buckets in table. */ + uint32_t key_len; /**< Length of hash key. */ + rte_hash_function hash_func; /**< Function used to calculate hash. */ + uint32_t hash_func_init_val; /**< Init value used by hash_func. */ + rte_hash_cmp_eq_t rte_hash_cmp_eq; /**< Function used to compare keys. */ + uint32_t bucket_bitmask; /**< Bitmask for getting bucket index + from hash signature. */ + uint32_t key_entry_size; /**< Size of each key entry. */ + + struct rte_ring *free_slots; /**< Ring that stores all indexes + of the free slots in the key table */ + void *key_store; /**< Table storing all keys and data */ + struct rte_hash_bucket *buckets; /**< Table with buckets storing all the + hash values and key indexes + to the key table*/ + uint8_t hw_trans_mem_support; /**< Hardware transactional + memory support */ + struct lcore_cache *local_free_slots; + /**< Local cache per lcore, storing some indexes of the free slots */ +} __rte_cache_aligned; + +/* Structure storing both primary and secondary hashes */ +struct rte_hash_signatures { + union { + struct { + hash_sig_t current; + hash_sig_t alt; + }; + uint64_t sig; + }; +}; + +/* Structure that stores key-value pair */ +struct rte_hash_key { + union { + uintptr_t idata; + void *pdata; + }; + /* Variable key size */ + char key[0]; +} __attribute__((aligned(KEY_ALIGNMENT))); + +/** Bucket structure */ +struct rte_hash_bucket { + struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES]; + /* Includes dummy key index that always contains index 0 */ + uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1]; + uint8_t flag[RTE_HASH_BUCKET_ENTRIES]; +} __rte_cache_aligned; + +struct rte_hash * +rte_hash_find_existing(const char *name) +{ + struct rte_hash *h = NULL; + struct rte_tailq_entry *te; + struct rte_hash_list *hash_list; + + hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list); + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_FOREACH(te, hash_list, next) { + h = (struct rte_hash *) te->data; + if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0) + break; + } + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + return h; +} + +void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func) +{ + h->rte_hash_cmp_eq = func; +} + +struct rte_hash * +rte_hash_create(const struct rte_hash_parameters *params) +{ + struct rte_hash *h = NULL; + struct rte_tailq_entry *te = NULL; + struct rte_hash_list *hash_list; + struct rte_ring *r = NULL; + char hash_name[RTE_HASH_NAMESIZE]; + void *k = NULL; + void *buckets = NULL; + char ring_name[RTE_RING_NAMESIZE]; + unsigned num_key_slots; + unsigned hw_trans_mem_support = 0; + unsigned i; + + hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list); + + if (params == NULL) { + RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n"); + return NULL; + } + + /* Check for valid parameters */ + if ((params->entries > RTE_HASH_ENTRIES_MAX) || + (params->entries < RTE_HASH_BUCKET_ENTRIES) || + !rte_is_power_of_2(RTE_HASH_BUCKET_ENTRIES) || + (params->key_len == 0)) { + rte_errno = EINVAL; + RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n"); + return NULL; + } + + /* Check extra flags field to check extra options. */ + if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT) + hw_trans_mem_support = 1; + + snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name); + + /* Guarantee there's no existing */ + h = rte_hash_find_existing(params->name); + if (h != NULL) + return h; + + te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, HASH, "tailq entry allocation failed\n"); + goto err; + } + + h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash), + RTE_CACHE_LINE_SIZE, params->socket_id); + + if (h == NULL) { + RTE_LOG(ERR, HASH, "memory allocation failed\n"); + goto err; + } + + const uint32_t num_buckets = rte_align32pow2(params->entries) + / RTE_HASH_BUCKET_ENTRIES; + + buckets = rte_zmalloc_socket(NULL, + num_buckets * sizeof(struct rte_hash_bucket), + RTE_CACHE_LINE_SIZE, params->socket_id); + + if (buckets == NULL) { + RTE_LOG(ERR, HASH, "memory allocation failed\n"); + goto err; + } + + const uint32_t key_entry_size = sizeof(struct rte_hash_key) + params->key_len; + + /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */ + if (hw_trans_mem_support) + /* + * Increase number of slots by total number of indices + * that can be stored in the lcore caches + * except for the first cache + */ + num_key_slots = params->entries + (RTE_MAX_LCORE - 1) * + LCORE_CACHE_SIZE + 1; + else + num_key_slots = params->entries + 1; + + const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots; + + k = rte_zmalloc_socket(NULL, key_tbl_size, + RTE_CACHE_LINE_SIZE, params->socket_id); + + if (k == NULL) { + RTE_LOG(ERR, HASH, "memory allocation failed\n"); + goto err; + } + +/* + * If x86 architecture is used, select appropriate compare function, + * which may use x86 instrinsics, otherwise use memcmp + */ +#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) ||\ + defined(RTE_ARCH_X86_X32) || defined(RTE_ARCH_ARM64) + /* Select function to compare keys */ + switch (params->key_len) { + case 16: + h->rte_hash_cmp_eq = rte_hash_k16_cmp_eq; + break; + case 32: + h->rte_hash_cmp_eq = rte_hash_k32_cmp_eq; + break; + case 48: + h->rte_hash_cmp_eq = rte_hash_k48_cmp_eq; + break; + case 64: + h->rte_hash_cmp_eq = rte_hash_k64_cmp_eq; + break; + case 80: + h->rte_hash_cmp_eq = rte_hash_k80_cmp_eq; + break; + case 96: + h->rte_hash_cmp_eq = rte_hash_k96_cmp_eq; + break; + case 112: + h->rte_hash_cmp_eq = rte_hash_k112_cmp_eq; + break; + case 128: + h->rte_hash_cmp_eq = rte_hash_k128_cmp_eq; + break; + default: + /* If key is not multiple of 16, use generic memcmp */ + h->rte_hash_cmp_eq = memcmp; + } +#else + h->rte_hash_cmp_eq = memcmp; +#endif + + snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name); + r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots), + params->socket_id, 0); + if (r == NULL) { + RTE_LOG(ERR, HASH, "memory allocation failed\n"); + goto err; + } + + if (hw_trans_mem_support) { + h->local_free_slots = rte_zmalloc_socket(NULL, + sizeof(struct lcore_cache) * RTE_MAX_LCORE, + RTE_CACHE_LINE_SIZE, params->socket_id); + } + + /* Setup hash context */ + snprintf(h->name, sizeof(h->name), "%s", params->name); + h->entries = params->entries; + h->key_len = params->key_len; + h->key_entry_size = key_entry_size; + h->hash_func_init_val = params->hash_func_init_val; + + h->num_buckets = num_buckets; + h->bucket_bitmask = h->num_buckets - 1; + h->buckets = buckets; + h->hash_func = (params->hash_func == NULL) ? + DEFAULT_HASH_FUNC : params->hash_func; + h->key_store = k; + h->free_slots = r; + h->hw_trans_mem_support = hw_trans_mem_support; + + /* populate the free slots ring. Entry zero is reserved for key misses */ + for (i = 1; i < params->entries + 1; i++) + rte_ring_sp_enqueue(r, (void *)((uintptr_t) i)); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + te->data = (void *) h; + TAILQ_INSERT_TAIL(hash_list, te, next); + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + return h; +err: + rte_free(te); + rte_free(h); + rte_free(buckets); + rte_free(k); + return NULL; +} + +void +rte_hash_free(struct rte_hash *h) +{ + struct rte_tailq_entry *te; + struct rte_hash_list *hash_list; + + if (h == NULL) + return; + + hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* find out tailq entry */ + TAILQ_FOREACH(te, hash_list, next) { + if (te->data == (void *) h) + break; + } + + if (te == NULL) { + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + return; + } + + TAILQ_REMOVE(hash_list, te, next); + + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + if (h->hw_trans_mem_support) + rte_free(h->local_free_slots); + + rte_ring_free(h->free_slots); + rte_free(h->key_store); + rte_free(h->buckets); + rte_free(h); + rte_free(te); +} + +hash_sig_t +rte_hash_hash(const struct rte_hash *h, const void *key) +{ + /* calc hash result by key */ + return h->hash_func(key, h->key_len, h->hash_func_init_val); +} + +/* Calc the secondary hash value from the primary hash value of a given key */ +static inline hash_sig_t +rte_hash_secondary_hash(const hash_sig_t primary_hash) +{ + static const unsigned all_bits_shift = 12; + static const unsigned alt_bits_xor = 0x5bd1e995; + + uint32_t tag = primary_hash >> all_bits_shift; + + return (primary_hash ^ ((tag + 1) * alt_bits_xor)); +} + +void +rte_hash_reset(struct rte_hash *h) +{ + void *ptr; + unsigned i; + + if (h == NULL) + return; + + memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket)); + memset(h->key_store, 0, h->key_entry_size * (h->entries + 1)); + + /* clear the free ring */ + while (rte_ring_dequeue(h->free_slots, &ptr) == 0) + rte_pause(); + + /* Repopulate the free slots ring. Entry zero is reserved for key misses */ + for (i = 1; i < h->entries + 1; i++) + rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i)); + + if (h->hw_trans_mem_support) { + /* Reset local caches per lcore */ + for (i = 0; i < RTE_MAX_LCORE; i++) + h->local_free_slots[i].len = 0; + } +} + +/* Search for an entry that can be pushed to its alternative location */ +static inline int +make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) +{ + unsigned i, j; + int ret; + uint32_t next_bucket_idx; + struct rte_hash_bucket *next_bkt[RTE_HASH_BUCKET_ENTRIES]; + + /* + * Push existing item (search for bucket with space in + * alternative locations) to its alternative location + */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + /* Search for space in alternative locations */ + next_bucket_idx = bkt->signatures[i].alt & h->bucket_bitmask; + next_bkt[i] = &h->buckets[next_bucket_idx]; + for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) { + if (next_bkt[i]->signatures[j].sig == NULL_SIGNATURE) + break; + } + + if (j != RTE_HASH_BUCKET_ENTRIES) + break; + } + + /* Alternative location has spare room (end of recursive function) */ + if (i != RTE_HASH_BUCKET_ENTRIES) { + next_bkt[i]->signatures[j].alt = bkt->signatures[i].current; + next_bkt[i]->signatures[j].current = bkt->signatures[i].alt; + next_bkt[i]->key_idx[j] = bkt->key_idx[i]; + return i; + } + + /* Pick entry that has not been pushed yet */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) + if (bkt->flag[i] == 0) + break; + + /* All entries have been pushed, so entry cannot be added */ + if (i == RTE_HASH_BUCKET_ENTRIES) + return -ENOSPC; + + /* Set flag to indicate that this entry is going to be pushed */ + bkt->flag[i] = 1; + /* Need room in alternative bucket to insert the pushed entry */ + ret = make_space_bucket(h, next_bkt[i]); + /* + * After recursive function. + * Clear flags and insert the pushed entry + * in its alternative location if successful, + * or return error + */ + bkt->flag[i] = 0; + if (ret >= 0) { + next_bkt[i]->signatures[ret].alt = bkt->signatures[i].current; + next_bkt[i]->signatures[ret].current = bkt->signatures[i].alt; + next_bkt[i]->key_idx[ret] = bkt->key_idx[i]; + return i; + } else + return ret; + +} + +/* + * Function called to enqueue back an index in the cache/ring, + * as slot has not being used and it can be used in the + * next addition attempt. + */ +static inline void +enqueue_slot_back(const struct rte_hash *h, + struct lcore_cache *cached_free_slots, + void *slot_id) +{ + if (h->hw_trans_mem_support) { + cached_free_slots->objs[cached_free_slots->len] = slot_id; + cached_free_slots->len++; + } else + rte_ring_sp_enqueue(h->free_slots, slot_id); +} + +static inline int32_t +__rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, + hash_sig_t sig, void *data) +{ + hash_sig_t alt_hash; + uint32_t prim_bucket_idx, sec_bucket_idx; + unsigned i; + struct rte_hash_bucket *prim_bkt, *sec_bkt; + struct rte_hash_key *new_k, *k, *keys = h->key_store; + void *slot_id = NULL; + uint32_t new_idx; + int ret; + unsigned n_slots; + unsigned lcore_id; + struct lcore_cache *cached_free_slots = NULL; + + prim_bucket_idx = sig & h->bucket_bitmask; + prim_bkt = &h->buckets[prim_bucket_idx]; + rte_prefetch0(prim_bkt); + + alt_hash = rte_hash_secondary_hash(sig); + sec_bucket_idx = alt_hash & h->bucket_bitmask; + sec_bkt = &h->buckets[sec_bucket_idx]; + rte_prefetch0(sec_bkt); + + /* Get a new slot for storing the new key */ + if (h->hw_trans_mem_support) { + lcore_id = rte_lcore_id(); + cached_free_slots = &h->local_free_slots[lcore_id]; + /* Try to get a free slot from the local cache */ + if (cached_free_slots->len == 0) { + /* Need to get another burst of free slots from global ring */ + n_slots = rte_ring_mc_dequeue_burst(h->free_slots, + cached_free_slots->objs, LCORE_CACHE_SIZE); + if (n_slots == 0) + return -ENOSPC; + + cached_free_slots->len += n_slots; + } + + /* Get a free slot from the local cache */ + cached_free_slots->len--; + slot_id = cached_free_slots->objs[cached_free_slots->len]; + } else { + if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) + return -ENOSPC; + } + + new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size); + rte_prefetch0(new_k); + new_idx = (uint32_t)((uintptr_t) slot_id); + + /* Check if key is already inserted in primary location */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + if (prim_bkt->signatures[i].current == sig && + prim_bkt->signatures[i].alt == alt_hash) { + k = (struct rte_hash_key *) ((char *)keys + + prim_bkt->key_idx[i] * h->key_entry_size); + if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) { + /* Enqueue index of free slot back in the ring. */ + enqueue_slot_back(h, cached_free_slots, slot_id); + /* Update data */ + k->pdata = data; + /* + * Return index where key is stored, + * substracting the first dummy index + */ + return (prim_bkt->key_idx[i] - 1); + } + } + } + + /* Check if key is already inserted in secondary location */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + if (sec_bkt->signatures[i].alt == sig && + sec_bkt->signatures[i].current == alt_hash) { + k = (struct rte_hash_key *) ((char *)keys + + sec_bkt->key_idx[i] * h->key_entry_size); + if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) { + /* Enqueue index of free slot back in the ring. */ + enqueue_slot_back(h, cached_free_slots, slot_id); + /* Update data */ + k->pdata = data; + /* + * Return index where key is stored, + * substracting the first dummy index + */ + return (sec_bkt->key_idx[i] - 1); + } + } + } + + /* Copy key */ + rte_memcpy(new_k->key, key, h->key_len); + new_k->pdata = data; + + /* Insert new entry is there is room in the primary bucket */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + /* Check if slot is available */ + if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) { + prim_bkt->signatures[i].current = sig; + prim_bkt->signatures[i].alt = alt_hash; + prim_bkt->key_idx[i] = new_idx; + return new_idx - 1; + } + } + + /* Primary bucket is full, so we need to make space for new entry */ + ret = make_space_bucket(h, prim_bkt); + /* + * After recursive function. + * Insert the new entry in the position of the pushed entry + * if successful or return error and + * store the new slot back in the ring + */ + if (ret >= 0) { + prim_bkt->signatures[ret].current = sig; + prim_bkt->signatures[ret].alt = alt_hash; + prim_bkt->key_idx[ret] = new_idx; + return (new_idx - 1); + } + + /* Error in addition, store new slot back in the ring and return error */ + enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx)); + + return ret; +} + +int32_t +rte_hash_add_key_with_hash(const struct rte_hash *h, + const void *key, hash_sig_t sig) +{ + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + return __rte_hash_add_key_with_hash(h, key, sig, 0); +} + +int32_t +rte_hash_add_key(const struct rte_hash *h, const void *key) +{ + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + return __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), 0); +} + +int +rte_hash_add_key_with_hash_data(const struct rte_hash *h, + const void *key, hash_sig_t sig, void *data) +{ + int ret; + + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + ret = __rte_hash_add_key_with_hash(h, key, sig, data); + if (ret >= 0) + return 0; + else + return ret; +} + +int +rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data) +{ + int ret; + + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + + ret = __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), data); + if (ret >= 0) + return 0; + else + return ret; +} +static inline int32_t +__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, + hash_sig_t sig, void **data) +{ + uint32_t bucket_idx; + hash_sig_t alt_hash; + unsigned i; + struct rte_hash_bucket *bkt; + struct rte_hash_key *k, *keys = h->key_store; + + bucket_idx = sig & h->bucket_bitmask; + bkt = &h->buckets[bucket_idx]; + + /* Check if key is in primary location */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + if (bkt->signatures[i].current == sig && + bkt->signatures[i].sig != NULL_SIGNATURE) { + k = (struct rte_hash_key *) ((char *)keys + + bkt->key_idx[i] * h->key_entry_size); + if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) { + if (data != NULL) + *data = k->pdata; + /* + * Return index where key is stored, + * substracting the first dummy index + */ + return (bkt->key_idx[i] - 1); + } + } + } + + /* Calculate secondary hash */ + alt_hash = rte_hash_secondary_hash(sig); + bucket_idx = alt_hash & h->bucket_bitmask; + bkt = &h->buckets[bucket_idx]; + + /* Check if key is in secondary location */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + if (bkt->signatures[i].current == alt_hash && + bkt->signatures[i].alt == sig) { + k = (struct rte_hash_key *) ((char *)keys + + bkt->key_idx[i] * h->key_entry_size); + if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) { + if (data != NULL) + *data = k->pdata; + /* + * Return index where key is stored, + * substracting the first dummy index + */ + return (bkt->key_idx[i] - 1); + } + } + } + + return -ENOENT; +} + +int32_t +rte_hash_lookup_with_hash(const struct rte_hash *h, + const void *key, hash_sig_t sig) +{ + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + return __rte_hash_lookup_with_hash(h, key, sig, NULL); +} + +int32_t +rte_hash_lookup(const struct rte_hash *h, const void *key) +{ + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), NULL); +} + +int +rte_hash_lookup_with_hash_data(const struct rte_hash *h, + const void *key, hash_sig_t sig, void **data) +{ + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + return __rte_hash_lookup_with_hash(h, key, sig, data); +} + +int +rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data) +{ + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data); +} + +static inline void +remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i) +{ + unsigned lcore_id, n_slots; + struct lcore_cache *cached_free_slots; + + bkt->signatures[i].sig = NULL_SIGNATURE; + if (h->hw_trans_mem_support) { + lcore_id = rte_lcore_id(); + cached_free_slots = &h->local_free_slots[lcore_id]; + /* Cache full, need to free it. */ + if (cached_free_slots->len == LCORE_CACHE_SIZE) { + /* Need to enqueue the free slots in global ring. */ + n_slots = rte_ring_mp_enqueue_burst(h->free_slots, + cached_free_slots->objs, + LCORE_CACHE_SIZE); + cached_free_slots->len -= n_slots; + } + /* Put index of new free slot in cache. */ + cached_free_slots->objs[cached_free_slots->len] = + (void *)((uintptr_t)bkt->key_idx[i]); + cached_free_slots->len++; + } else { + rte_ring_sp_enqueue(h->free_slots, + (void *)((uintptr_t)bkt->key_idx[i])); + } +} + +static inline int32_t +__rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, + hash_sig_t sig) +{ + uint32_t bucket_idx; + hash_sig_t alt_hash; + unsigned i; + struct rte_hash_bucket *bkt; + struct rte_hash_key *k, *keys = h->key_store; + + bucket_idx = sig & h->bucket_bitmask; + bkt = &h->buckets[bucket_idx]; + + /* Check if key is in primary location */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + if (bkt->signatures[i].current == sig && + bkt->signatures[i].sig != NULL_SIGNATURE) { + k = (struct rte_hash_key *) ((char *)keys + + bkt->key_idx[i] * h->key_entry_size); + if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) { + remove_entry(h, bkt, i); + + /* + * Return index where key is stored, + * substracting the first dummy index + */ + return (bkt->key_idx[i] - 1); + } + } + } + + /* Calculate secondary hash */ + alt_hash = rte_hash_secondary_hash(sig); + bucket_idx = alt_hash & h->bucket_bitmask; + bkt = &h->buckets[bucket_idx]; + + /* Check if key is in secondary location */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + if (bkt->signatures[i].current == alt_hash && + bkt->signatures[i].sig != NULL_SIGNATURE) { + k = (struct rte_hash_key *) ((char *)keys + + bkt->key_idx[i] * h->key_entry_size); + if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) { + remove_entry(h, bkt, i); + + /* + * Return index where key is stored, + * substracting the first dummy index + */ + return (bkt->key_idx[i] - 1); + } + } + } + + return -ENOENT; +} + +int32_t +rte_hash_del_key_with_hash(const struct rte_hash *h, + const void *key, hash_sig_t sig) +{ + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + return __rte_hash_del_key_with_hash(h, key, sig); +} + +int32_t +rte_hash_del_key(const struct rte_hash *h, const void *key) +{ + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key)); +} + +/* Lookup bulk stage 0: Prefetch input key */ +static inline void +lookup_stage0(unsigned *idx, uint64_t *lookup_mask, + const void * const *keys) +{ + *idx = __builtin_ctzl(*lookup_mask); + if (*lookup_mask == 0) + *idx = 0; + + rte_prefetch0(keys[*idx]); + *lookup_mask &= ~(1llu << *idx); +} + +/* + * Lookup bulk stage 1: Calculate primary/secondary hashes + * and prefetch primary/secondary buckets + */ +static inline void +lookup_stage1(unsigned idx, hash_sig_t *prim_hash, hash_sig_t *sec_hash, + const struct rte_hash_bucket **primary_bkt, + const struct rte_hash_bucket **secondary_bkt, + hash_sig_t *hash_vals, const void * const *keys, + const struct rte_hash *h) +{ + *prim_hash = rte_hash_hash(h, keys[idx]); + hash_vals[idx] = *prim_hash; + *sec_hash = rte_hash_secondary_hash(*prim_hash); + + *primary_bkt = &h->buckets[*prim_hash & h->bucket_bitmask]; + *secondary_bkt = &h->buckets[*sec_hash & h->bucket_bitmask]; + + rte_prefetch0(*primary_bkt); + rte_prefetch0(*secondary_bkt); +} + +/* + * Lookup bulk stage 2: Search for match hashes in primary/secondary locations + * and prefetch first key slot + */ +static inline void +lookup_stage2(unsigned idx, hash_sig_t prim_hash, hash_sig_t sec_hash, + const struct rte_hash_bucket *prim_bkt, + const struct rte_hash_bucket *sec_bkt, + const struct rte_hash_key **key_slot, int32_t *positions, + uint64_t *extra_hits_mask, const void *keys, + const struct rte_hash *h) +{ + unsigned prim_hash_matches, sec_hash_matches, key_idx, i; + unsigned total_hash_matches; + + prim_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES; + sec_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES; + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + prim_hash_matches |= ((prim_hash == prim_bkt->signatures[i].current) << i); + sec_hash_matches |= ((sec_hash == sec_bkt->signatures[i].current) << i); + } + + key_idx = prim_bkt->key_idx[__builtin_ctzl(prim_hash_matches)]; + if (key_idx == 0) + key_idx = sec_bkt->key_idx[__builtin_ctzl(sec_hash_matches)]; + + total_hash_matches = (prim_hash_matches | + (sec_hash_matches << (RTE_HASH_BUCKET_ENTRIES + 1))); + *key_slot = (const struct rte_hash_key *) ((const char *)keys + + key_idx * h->key_entry_size); + + rte_prefetch0(*key_slot); + /* + * Return index where key is stored, + * substracting the first dummy index + */ + positions[idx] = (key_idx - 1); + + *extra_hits_mask |= (uint64_t)(__builtin_popcount(total_hash_matches) > 3) << idx; + +} + + +/* Lookup bulk stage 3: Check if key matches, update hit mask and return data */ +static inline void +lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * const *keys, + const int32_t *positions, void *data[], uint64_t *hits, + const struct rte_hash *h) +{ + unsigned hit; + unsigned key_idx; + + hit = !h->rte_hash_cmp_eq(key_slot->key, keys[idx], h->key_len); + if (data != NULL) + data[idx] = key_slot->pdata; + + key_idx = positions[idx] + 1; + /* + * If key index is 0, force hit to be 0, in case key to be looked up + * is all zero (as in the dummy slot), which would result in a wrong hit + */ + *hits |= (uint64_t)(hit && !!key_idx) << idx; +} + +static inline void +__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, + uint32_t num_keys, int32_t *positions, + uint64_t *hit_mask, void *data[]) +{ + uint64_t hits = 0; + uint64_t extra_hits_mask = 0; + uint64_t lookup_mask, miss_mask; + unsigned idx; + const void *key_store = h->key_store; + int ret; + hash_sig_t hash_vals[RTE_HASH_LOOKUP_BULK_MAX]; + + unsigned idx00, idx01, idx10, idx11, idx20, idx21, idx30, idx31; + const struct rte_hash_bucket *primary_bkt10, *primary_bkt11; + const struct rte_hash_bucket *secondary_bkt10, *secondary_bkt11; + const struct rte_hash_bucket *primary_bkt20, *primary_bkt21; + const struct rte_hash_bucket *secondary_bkt20, *secondary_bkt21; + const struct rte_hash_key *k_slot20, *k_slot21, *k_slot30, *k_slot31; + hash_sig_t primary_hash10, primary_hash11; + hash_sig_t secondary_hash10, secondary_hash11; + hash_sig_t primary_hash20, primary_hash21; + hash_sig_t secondary_hash20, secondary_hash21; + + lookup_mask = (uint64_t) -1 >> (64 - num_keys); + miss_mask = lookup_mask; + + lookup_stage0(&idx00, &lookup_mask, keys); + lookup_stage0(&idx01, &lookup_mask, keys); + + idx10 = idx00, idx11 = idx01; + + lookup_stage0(&idx00, &lookup_mask, keys); + lookup_stage0(&idx01, &lookup_mask, keys); + lookup_stage1(idx10, &primary_hash10, &secondary_hash10, + &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); + lookup_stage1(idx11, &primary_hash11, &secondary_hash11, + &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); + + primary_bkt20 = primary_bkt10; + primary_bkt21 = primary_bkt11; + secondary_bkt20 = secondary_bkt10; + secondary_bkt21 = secondary_bkt11; + primary_hash20 = primary_hash10; + primary_hash21 = primary_hash11; + secondary_hash20 = secondary_hash10; + secondary_hash21 = secondary_hash11; + idx20 = idx10, idx21 = idx11; + idx10 = idx00, idx11 = idx01; + + lookup_stage0(&idx00, &lookup_mask, keys); + lookup_stage0(&idx01, &lookup_mask, keys); + lookup_stage1(idx10, &primary_hash10, &secondary_hash10, + &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); + lookup_stage1(idx11, &primary_hash11, &secondary_hash11, + &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); + lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, + secondary_bkt20, &k_slot20, positions, &extra_hits_mask, + key_store, h); + lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, + secondary_bkt21, &k_slot21, positions, &extra_hits_mask, + key_store, h); + + while (lookup_mask) { + k_slot30 = k_slot20, k_slot31 = k_slot21; + idx30 = idx20, idx31 = idx21; + primary_bkt20 = primary_bkt10; + primary_bkt21 = primary_bkt11; + secondary_bkt20 = secondary_bkt10; + secondary_bkt21 = secondary_bkt11; + primary_hash20 = primary_hash10; + primary_hash21 = primary_hash11; + secondary_hash20 = secondary_hash10; + secondary_hash21 = secondary_hash11; + idx20 = idx10, idx21 = idx11; + idx10 = idx00, idx11 = idx01; + + lookup_stage0(&idx00, &lookup_mask, keys); + lookup_stage0(&idx01, &lookup_mask, keys); + lookup_stage1(idx10, &primary_hash10, &secondary_hash10, + &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); + lookup_stage1(idx11, &primary_hash11, &secondary_hash11, + &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); + lookup_stage2(idx20, primary_hash20, secondary_hash20, + primary_bkt20, secondary_bkt20, &k_slot20, positions, + &extra_hits_mask, key_store, h); + lookup_stage2(idx21, primary_hash21, secondary_hash21, + primary_bkt21, secondary_bkt21, &k_slot21, positions, + &extra_hits_mask, key_store, h); + lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); + lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); + } + + k_slot30 = k_slot20, k_slot31 = k_slot21; + idx30 = idx20, idx31 = idx21; + primary_bkt20 = primary_bkt10; + primary_bkt21 = primary_bkt11; + secondary_bkt20 = secondary_bkt10; + secondary_bkt21 = secondary_bkt11; + primary_hash20 = primary_hash10; + primary_hash21 = primary_hash11; + secondary_hash20 = secondary_hash10; + secondary_hash21 = secondary_hash11; + idx20 = idx10, idx21 = idx11; + idx10 = idx00, idx11 = idx01; + + lookup_stage1(idx10, &primary_hash10, &secondary_hash10, + &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); + lookup_stage1(idx11, &primary_hash11, &secondary_hash11, + &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); + lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, + secondary_bkt20, &k_slot20, positions, &extra_hits_mask, + key_store, h); + lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, + secondary_bkt21, &k_slot21, positions, &extra_hits_mask, + key_store, h); + lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); + lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); + + k_slot30 = k_slot20, k_slot31 = k_slot21; + idx30 = idx20, idx31 = idx21; + primary_bkt20 = primary_bkt10; + primary_bkt21 = primary_bkt11; + secondary_bkt20 = secondary_bkt10; + secondary_bkt21 = secondary_bkt11; + primary_hash20 = primary_hash10; + primary_hash21 = primary_hash11; + secondary_hash20 = secondary_hash10; + secondary_hash21 = secondary_hash11; + idx20 = idx10, idx21 = idx11; + + lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, + secondary_bkt20, &k_slot20, positions, &extra_hits_mask, + key_store, h); + lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, + secondary_bkt21, &k_slot21, positions, &extra_hits_mask, + key_store, h); + lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); + lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); + + k_slot30 = k_slot20, k_slot31 = k_slot21; + idx30 = idx20, idx31 = idx21; + + lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); + lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); + + /* ignore any items we have already found */ + extra_hits_mask &= ~hits; + + if (unlikely(extra_hits_mask)) { + /* run a single search for each remaining item */ + do { + idx = __builtin_ctzl(extra_hits_mask); + if (data != NULL) { + ret = rte_hash_lookup_with_hash_data(h, + keys[idx], hash_vals[idx], &data[idx]); + if (ret >= 0) + hits |= 1ULL << idx; + } else { + positions[idx] = rte_hash_lookup_with_hash(h, + keys[idx], hash_vals[idx]); + if (positions[idx] >= 0) + hits |= 1llu << idx; + } + extra_hits_mask &= ~(1llu << idx); + } while (extra_hits_mask); + } + + miss_mask &= ~hits; + if (unlikely(miss_mask)) { + do { + idx = __builtin_ctzl(miss_mask); + positions[idx] = -ENOENT; + miss_mask &= ~(1llu << idx); + } while (miss_mask); + } + + if (hit_mask != NULL) + *hit_mask = hits; +} + +int +rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, + uint32_t num_keys, int32_t *positions) +{ + RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) || + (num_keys > RTE_HASH_LOOKUP_BULK_MAX) || + (positions == NULL)), -EINVAL); + + __rte_hash_lookup_bulk(h, keys, num_keys, positions, NULL, NULL); + return 0; +} + +int +rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys, + uint32_t num_keys, uint64_t *hit_mask, void *data[]) +{ + RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) || + (num_keys > RTE_HASH_LOOKUP_BULK_MAX) || + (hit_mask == NULL)), -EINVAL); + + int32_t positions[num_keys]; + + __rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data); + + /* Return number of hits */ + return __builtin_popcountl(*hit_mask); +} + +int32_t +rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next) +{ + uint32_t bucket_idx, idx, position; + struct rte_hash_key *next_key; + + RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL); + + const uint32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES; + /* Out of bounds */ + if (*next >= total_entries) + return -ENOENT; + + /* Calculate bucket and index of current iterator */ + bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES; + idx = *next % RTE_HASH_BUCKET_ENTRIES; + + /* If current position is empty, go to the next one */ + while (h->buckets[bucket_idx].signatures[idx].sig == NULL_SIGNATURE) { + (*next)++; + /* End of table */ + if (*next == total_entries) + return -ENOENT; + bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES; + idx = *next % RTE_HASH_BUCKET_ENTRIES; + } + + /* Get position of entry in key table */ + position = h->buckets[bucket_idx].key_idx[idx]; + next_key = (struct rte_hash_key *) ((char *)h->key_store + + position * h->key_entry_size); + /* Return key and data */ + *key = next_key->key; + *data = next_key->pdata; + + /* Increment iterator */ + (*next)++; + + return (position - 1); +} diff --git a/src/dpdk22/lib/librte_hash/rte_fbk_hash.h b/src/dpdk22/lib/librte_hash/rte_fbk_hash.h new file mode 100644 index 00000000..a430961d --- /dev/null +++ b/src/dpdk22/lib/librte_hash/rte_fbk_hash.h @@ -0,0 +1,396 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_FBK_HASH_H_ +#define _RTE_FBK_HASH_H_ + +/** + * @file + * + * This is a hash table implementation for four byte keys (fbk). + * + * Note that the return value of the add function should always be checked as, + * if a bucket is full, the key is not added even if there is space in other + * buckets. This keeps the lookup function very simple and therefore fast. + */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#ifndef RTE_FBK_HASH_FUNC_DEFAULT +#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#include +/** Default four-byte key hash function if none is specified. */ +#define RTE_FBK_HASH_FUNC_DEFAULT rte_hash_crc_4byte +#else +#include +#define RTE_FBK_HASH_FUNC_DEFAULT rte_jhash_1word +#endif +#endif + +#ifndef RTE_FBK_HASH_INIT_VAL_DEFAULT +/** Initialising value used when calculating hash. */ +#define RTE_FBK_HASH_INIT_VAL_DEFAULT 0xFFFFFFFF +#endif + +/** The maximum number of entries in the hash table that is supported. */ +#define RTE_FBK_HASH_ENTRIES_MAX (1 << 20) + +/** The maximum number of entries in each bucket that is supported. */ +#define RTE_FBK_HASH_ENTRIES_PER_BUCKET_MAX 256 + +/** Maximum size of string for naming the hash. */ +#define RTE_FBK_HASH_NAMESIZE 32 + +/** Type of function that can be used for calculating the hash value. */ +typedef uint32_t (*rte_fbk_hash_fn)(uint32_t key, uint32_t init_val); + +/** Parameters used when creating four-byte key hash table. */ +struct rte_fbk_hash_params { + const char *name; /**< Name of the hash table. */ + uint32_t entries; /**< Total number of entries. */ + uint32_t entries_per_bucket; /**< Number of entries in a bucket. */ + int socket_id; /**< Socket to allocate memory on. */ + rte_fbk_hash_fn hash_func; /**< The hash function. */ + uint32_t init_val; /**< For initialising hash function. */ +}; + +/** Individual entry in the four-byte key hash table. */ +union rte_fbk_hash_entry { + uint64_t whole_entry; /**< For accessing entire entry. */ + struct { + uint16_t is_entry; /**< Non-zero if entry is active. */ + uint16_t value; /**< Value returned by lookup. */ + uint32_t key; /**< Key used to find value. */ + } entry; /**< For accessing each entry part. */ +}; + + +/** The four-byte key hash table structure. */ +struct rte_fbk_hash_table { + char name[RTE_FBK_HASH_NAMESIZE]; /**< Name of the hash. */ + uint32_t entries; /**< Total number of entries. */ + uint32_t entries_per_bucket; /**< Number of entries in a bucket. */ + uint32_t used_entries; /**< How many entries are used. */ + uint32_t bucket_mask; /**< To find which bucket the key is in. */ + uint32_t bucket_shift; /**< Convert bucket to table offset. */ + rte_fbk_hash_fn hash_func; /**< The hash function. */ + uint32_t init_val; /**< For initialising hash function. */ + + /** A flat table of all buckets. */ + union rte_fbk_hash_entry t[0]; +}; + +/** + * Find the offset into hash table of the bucket containing a particular key. + * + * @param ht + * Pointer to hash table. + * @param key + * Key to calculate bucket for. + * @return + * Offset into hash table. + */ +static inline uint32_t +rte_fbk_hash_get_bucket(const struct rte_fbk_hash_table *ht, uint32_t key) +{ + return (ht->hash_func(key, ht->init_val) & ht->bucket_mask) << + ht->bucket_shift; +} + +/** + * Add a key to an existing hash table with bucket id. + * This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param ht + * Hash table to add the key to. + * @param key + * Key to add to the hash table. + * @param value + * Value to associate with key. + * @param bucket + * Bucket to associate with key. + * @return + * 0 if ok, or negative value on error. + */ +static inline int +rte_fbk_hash_add_key_with_bucket(struct rte_fbk_hash_table *ht, + uint32_t key, uint16_t value, uint32_t bucket) +{ + /* + * The writing of a new value to the hash table is done as a single + * 64bit operation. This should help prevent individual entries being + * corrupted due to race conditions, but it's still possible to + * overwrite entries that have just been made valid. + */ + const uint64_t new_entry = ((uint64_t)(key) << 32) | + ((uint64_t)(value) << 16) | + 1; /* 1 = is_entry bit. */ + uint32_t i; + + for (i = 0; i < ht->entries_per_bucket; i++) { + /* Set entry if unused. */ + if (! ht->t[bucket + i].entry.is_entry) { + ht->t[bucket + i].whole_entry = new_entry; + ht->used_entries++; + return 0; + } + /* Change value if key already exists. */ + if (ht->t[bucket + i].entry.key == key) { + ht->t[bucket + i].entry.value = value; + return 0; + } + } + + return -ENOSPC; /* No space in bucket. */ +} + +/** + * Add a key to an existing hash table. This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param ht + * Hash table to add the key to. + * @param key + * Key to add to the hash table. + * @param value + * Value to associate with key. + * @return + * 0 if ok, or negative value on error. + */ +static inline int +rte_fbk_hash_add_key(struct rte_fbk_hash_table *ht, + uint32_t key, uint16_t value) +{ + return rte_fbk_hash_add_key_with_bucket(ht, + key, value, rte_fbk_hash_get_bucket(ht, key)); +} + +/** + * Remove a key with a given bucket id from an existing hash table. + * This operation is not multi-thread + * safe and should only be called from one thread. + * + * @param ht + * Hash table to remove the key from. + * @param key + * Key to remove from the hash table. + * @param bucket + * Bucket id associate with key. + * @return + * 0 if ok, or negative value on error. + */ +static inline int +rte_fbk_hash_delete_key_with_bucket(struct rte_fbk_hash_table *ht, + uint32_t key, uint32_t bucket) +{ + uint32_t last_entry = ht->entries_per_bucket - 1; + uint32_t i, j; + + for (i = 0; i < ht->entries_per_bucket; i++) { + if (ht->t[bucket + i].entry.key == key) { + /* Find last key in bucket. */ + for (j = ht->entries_per_bucket - 1; j > i; j-- ) { + if (! ht->t[bucket + j].entry.is_entry) { + last_entry = j - 1; + } + } + /* + * Move the last key to the deleted key's position, and + * delete the last key. lastEntry and i may be same but + * it doesn't matter. + */ + ht->t[bucket + i].whole_entry = + ht->t[bucket + last_entry].whole_entry; + ht->t[bucket + last_entry].whole_entry = 0; + + ht->used_entries--; + return 0; + } + } + + return -ENOENT; /* Key didn't exist. */ +} + +/** + * Remove a key from an existing hash table. This operation is not multi-thread + * safe and should only be called from one thread. + * + * @param ht + * Hash table to remove the key from. + * @param key + * Key to remove from the hash table. + * @return + * 0 if ok, or negative value on error. + */ +static inline int +rte_fbk_hash_delete_key(struct rte_fbk_hash_table *ht, uint32_t key) +{ + return rte_fbk_hash_delete_key_with_bucket(ht, + key, rte_fbk_hash_get_bucket(ht, key)); +} + +/** + * Find a key in the hash table with a given bucketid. + * This operation is multi-thread safe. + * + * @param ht + * Hash table to look in. + * @param key + * Key to find. + * @param bucket + * Bucket associate to the key. + * @return + * The value that was associated with the key, or negative value on error. + */ +static inline int +rte_fbk_hash_lookup_with_bucket(const struct rte_fbk_hash_table *ht, + uint32_t key, uint32_t bucket) +{ + union rte_fbk_hash_entry current_entry; + uint32_t i; + + for (i = 0; i < ht->entries_per_bucket; i++) { + /* Single read of entry, which should be atomic. */ + current_entry.whole_entry = ht->t[bucket + i].whole_entry; + if (! current_entry.entry.is_entry) { + return -ENOENT; /* Error once we hit an empty field. */ + } + if (current_entry.entry.key == key) { + return current_entry.entry.value; + } + } + return -ENOENT; /* Key didn't exist. */ +} + +/** + * Find a key in the hash table. This operation is multi-thread safe. + * + * @param ht + * Hash table to look in. + * @param key + * Key to find. + * @return + * The value that was associated with the key, or negative value on error. + */ +static inline int +rte_fbk_hash_lookup(const struct rte_fbk_hash_table *ht, uint32_t key) +{ + return rte_fbk_hash_lookup_with_bucket(ht, + key, rte_fbk_hash_get_bucket(ht, key)); +} + +/** + * Delete all entries in a hash table. This operation is not multi-thread + * safe and should only be called from one thread. + * + * @param ht + * Hash table to delete entries in. + */ +static inline void +rte_fbk_hash_clear_all(struct rte_fbk_hash_table *ht) +{ + memset(ht->t, 0, sizeof(ht->t[0]) * ht->entries); + ht->used_entries = 0; +} + +/** + * Find what fraction of entries are being used. + * + * @param ht + * Hash table to find how many entries are being used in. + * @return + * Load factor of the hash table, or negative value on error. + */ +static inline double +rte_fbk_hash_get_load_factor(struct rte_fbk_hash_table *ht) +{ + return (double)ht->used_entries / (double)ht->entries; +} + +/** + * Performs a lookup for an existing hash table, and returns a pointer to + * the table if found. + * + * @param name + * Name of the hash table to find + * + * @return + * pointer to hash table structure or NULL on error with rte_errno + * set appropriately. Possible rte_errno values include: + * - ENOENT - required entry not available to return. + */ +struct rte_fbk_hash_table *rte_fbk_hash_find_existing(const char *name); + +/** + * Create a new hash table for use with four byte keys. + * + * @param params + * Parameters used in creation of hash table. + * + * @return + * Pointer to hash table structure that is used in future hash table + * operations, or NULL on error with rte_errno set appropriately. + * Possible rte_errno error values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - EINVAL - invalid parameter value passed to function + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_fbk_hash_table * \ +rte_fbk_hash_create(const struct rte_fbk_hash_params *params); + +/** + * Free all memory used by a hash table. + * Has no effect on hash tables allocated in memory zones + * + * @param ht + * Hash table to deallocate. + */ +void rte_fbk_hash_free(struct rte_fbk_hash_table *ht); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_FBK_HASH_H_ */ diff --git a/src/dpdk22/lib/librte_hash/rte_hash.h b/src/dpdk22/lib/librte_hash/rte_hash.h new file mode 100644 index 00000000..85fc4162 --- /dev/null +++ b/src/dpdk22/lib/librte_hash/rte_hash.h @@ -0,0 +1,436 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_HASH_H_ +#define _RTE_HASH_H_ + +/** + * @file + * + * RTE Hash Table + */ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** Maximum size of hash table that can be created. */ +#define RTE_HASH_ENTRIES_MAX (1 << 30) + +/** Maximum number of characters in hash name.*/ +#define RTE_HASH_NAMESIZE 32 + +/** Maximum number of keys that can be searched for using rte_hash_lookup_bulk. */ +#define RTE_HASH_LOOKUP_BULK_MAX 64 +#define RTE_HASH_LOOKUP_MULTI_MAX RTE_HASH_LOOKUP_BULK_MAX + +/** Enable Hardware transactional memory support. */ +#define RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT 0x01 + +/** Signature of key that is stored internally. */ +typedef uint32_t hash_sig_t; + +/** Type of function that can be used for calculating the hash value. */ +typedef uint32_t (*rte_hash_function)(const void *key, uint32_t key_len, + uint32_t init_val); + +/** Type of function used to compare the hash key. */ +typedef int (*rte_hash_cmp_eq_t)(const void *key1, const void *key2, size_t key_len); + +/** + * Parameters used when creating the hash table. + */ +struct rte_hash_parameters { + const char *name; /**< Name of the hash. */ + uint32_t entries; /**< Total hash table entries. */ + uint32_t reserved; /**< Unused field. Should be set to 0 */ + uint32_t key_len; /**< Length of hash key. */ + rte_hash_function hash_func; /**< Primary Hash function used to calculate hash. */ + uint32_t hash_func_init_val; /**< Init value used by hash_func. */ + int socket_id; /**< NUMA Socket ID for memory. */ + uint8_t extra_flag; /**< Indicate if additional parameters are present. */ +}; + +/** @internal A hash table structure. */ +struct rte_hash; + +/** + * Create a new hash table. + * + * @param params + * Parameters used to create and initialise the hash table. + * @return + * Pointer to hash table structure that is used in future hash table + * operations, or NULL on error, with error code set in rte_errno. + * Possible rte_errno errors include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - ENOENT - missing entry + * - EINVAL - invalid parameter passed to function + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_hash * +rte_hash_create(const struct rte_hash_parameters *params); + +/** + * Set a new hash compare function other than the default one. + * + * @note Function pointer does not work with multi-process, so do not use it + * in multi-process mode. + * + * @param h + * Hash table to reset + * @param func + * New compare function + */ +void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func); + +/** + * Find an existing hash table object and return a pointer to it. + * + * @param name + * Name of the hash table as passed to rte_hash_create() + * @return + * Pointer to hash table or NULL if object not found + * with rte_errno set appropriately. Possible rte_errno values include: + * - ENOENT - value not available for return + */ +struct rte_hash * +rte_hash_find_existing(const char *name); + +/** + * De-allocate all memory used by hash table. + * @param h + * Hash table to free + */ +void +rte_hash_free(struct rte_hash *h); + +/** + * Reset all hash structure, by zeroing all entries + * @param h + * Hash table to reset + */ +void +rte_hash_reset(struct rte_hash *h); + +/** + * Add a key-value pair to an existing hash table. + * This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param h + * Hash table to add the key to. + * @param key + * Key to add to the hash table. + * @param data + * Data to add to the hash table. + * @return + * - 0 if added successfully + * - -EINVAL if the parameters are invalid. + * - -ENOSPC if there is no space in the hash for this key. + */ +int +rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data); + +/** + * Add a key-value pair with a pre-computed hash value + * to an existing hash table. + * This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param h + * Hash table to add the key to. + * @param key + * Key to add to the hash table. + * @param sig + * Precomputed hash value for 'key' + * @param data + * Data to add to the hash table. + * @return + * - 0 if added successfully + * - -EINVAL if the parameters are invalid. + * - -ENOSPC if there is no space in the hash for this key. + */ +int32_t +rte_hash_add_key_with_hash_data(const struct rte_hash *h, const void *key, + hash_sig_t sig, void *data); + +/** + * Add a key to an existing hash table. This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param h + * Hash table to add the key to. + * @param key + * Key to add to the hash table. + * @return + * - -EINVAL if the parameters are invalid. + * - -ENOSPC if there is no space in the hash for this key. + * - A positive value that can be used by the caller as an offset into an + * array of user data. This value is unique for this key. + */ +int32_t +rte_hash_add_key(const struct rte_hash *h, const void *key); + +/** + * Add a key to an existing hash table. + * This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param h + * Hash table to add the key to. + * @param key + * Key to add to the hash table. + * @param sig + * Precomputed hash value for 'key'. + * @return + * - -EINVAL if the parameters are invalid. + * - -ENOSPC if there is no space in the hash for this key. + * - A positive value that can be used by the caller as an offset into an + * array of user data. This value is unique for this key. + */ +int32_t +rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t sig); + +/** + * Remove a key from an existing hash table. + * This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param h + * Hash table to remove the key from. + * @param key + * Key to remove from the hash table. + * @return + * - -EINVAL if the parameters are invalid. + * - -ENOENT if the key is not found. + * - A positive value that can be used by the caller as an offset into an + * array of user data. This value is unique for this key, and is the same + * value that was returned when the key was added. + */ +int32_t +rte_hash_del_key(const struct rte_hash *h, const void *key); + +/** + * Remove a key from an existing hash table. + * This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param h + * Hash table to remove the key from. + * @param key + * Key to remove from the hash table. + * @param sig + * Precomputed hash value for 'key'. + * @return + * - -EINVAL if the parameters are invalid. + * - -ENOENT if the key is not found. + * - A positive value that can be used by the caller as an offset into an + * array of user data. This value is unique for this key, and is the same + * value that was returned when the key was added. + */ +int32_t +rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t sig); + + +/** + * Find a key-value pair in the hash table. + * This operation is multi-thread safe. + * + * @param h + * Hash table to look in. + * @param key + * Key to find. + * @param data + * Output with pointer to data returned from the hash table. + * @return + * 0 if successful lookup + * - EINVAL if the parameters are invalid. + * - ENOENT if the key is not found. + */ +int +rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data); + +/** + * Find a key-value pair with a pre-computed hash value + * to an existing hash table. + * This operation is multi-thread safe. + * + * @param h + * Hash table to look in. + * @param key + * Key to find. + * @param sig + * Precomputed hash value for 'key' + * @param data + * Output with pointer to data returned from the hash table. + * @return + * 0 if successful lookup + * - EINVAL if the parameters are invalid. + * - ENOENT if the key is not found. + */ +int +rte_hash_lookup_with_hash_data(const struct rte_hash *h, const void *key, + hash_sig_t sig, void **data); + +/** + * Find a key in the hash table. + * This operation is multi-thread safe. + * + * @param h + * Hash table to look in. + * @param key + * Key to find. + * @return + * - -EINVAL if the parameters are invalid. + * - -ENOENT if the key is not found. + * - A positive value that can be used by the caller as an offset into an + * array of user data. This value is unique for this key, and is the same + * value that was returned when the key was added. + */ +int32_t +rte_hash_lookup(const struct rte_hash *h, const void *key); + +/** + * Find a key in the hash table. + * This operation is multi-thread safe. + * + * @param h + * Hash table to look in. + * @param key + * Key to find. + * @param sig + * Hash value to remove from the hash table. + * @return + * - -EINVAL if the parameters are invalid. + * - -ENOENT if the key is not found. + * - A positive value that can be used by the caller as an offset into an + * array of user data. This value is unique for this key, and is the same + * value that was returned when the key was added. + */ +int32_t +rte_hash_lookup_with_hash(const struct rte_hash *h, + const void *key, hash_sig_t sig); + +/** + * Calc a hash value by key. + * This operation is not multi-thread safe. + * + * @param h + * Hash table to look in. + * @param key + * Key to find. + * @return + * - hash value + */ +hash_sig_t +rte_hash_hash(const struct rte_hash *h, const void *key); + +#define rte_hash_lookup_multi rte_hash_lookup_bulk +#define rte_hash_lookup_multi_data rte_hash_lookup_bulk_data +/** + * Find multiple keys in the hash table. + * This operation is multi-thread safe. + * + * @param h + * Hash table to look in. + * @param keys + * A pointer to a list of keys to look for. + * @param num_keys + * How many keys are in the keys list (less than RTE_HASH_LOOKUP_BULK_MAX). + * @param hit_mask + * Output containing a bitmask with all successful lookups. + * @param data + * Output containing array of data returned from all the successful lookups. + * @return + * -EINVAL if there's an error, otherwise number of successful lookups. + */ +int +rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys, + uint32_t num_keys, uint64_t *hit_mask, void *data[]); + +/** + * Find multiple keys in the hash table. + * This operation is multi-thread safe. + * + * @param h + * Hash table to look in. + * @param keys + * A pointer to a list of keys to look for. + * @param num_keys + * How many keys are in the keys list (less than RTE_HASH_LOOKUP_BULK_MAX). + * @param positions + * Output containing a list of values, corresponding to the list of keys that + * can be used by the caller as an offset into an array of user data. These + * values are unique for each key, and are the same values that were returned + * when each key was added. If a key in the list was not found, then -ENOENT + * will be the value. + * @return + * -EINVAL if there's an error, otherwise 0. + */ +int +rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, + uint32_t num_keys, int32_t *positions); + +/** + * Iterate through the hash table, returning key-value pairs. + * + * @param h + * Hash table to iterate + * @param key + * Output containing the key where current iterator + * was pointing at + * @param data + * Output containing the data associated with key. + * Returns NULL if data was not stored. + * @param next + * Pointer to iterator. Should be 0 to start iterating the hash table. + * Iterator is incremented after each call of this function. + * @return + * Position where key was stored, if successful. + * - -EINVAL if the parameters are invalid. + * - -ENOENT if end of the hash table. + */ +int32_t +rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next); +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_HASH_H_ */ diff --git a/src/dpdk22/lib/librte_hash/rte_hash_crc.h b/src/dpdk22/lib/librte_hash/rte_hash_crc.h new file mode 100644 index 00000000..78a34b76 --- /dev/null +++ b/src/dpdk22/lib/librte_hash/rte_hash_crc.h @@ -0,0 +1,568 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_HASH_CRC_H_ +#define _RTE_HASH_CRC_H_ + +/** + * @file + * + * RTE CRC Hash + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +/* Lookup tables for software implementation of CRC32C */ +static const uint32_t crc32c_tables[8][256] = {{ + 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, + 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, + 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, + 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B, + 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, + 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA, + 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A, + 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, + 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957, + 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198, + 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, + 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7, + 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789, + 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, + 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6, + 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829, + 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93, + 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C, + 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC, + 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, + 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D, + 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982, + 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, + 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED, + 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F, + 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, + 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540, + 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F, + 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, + 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E, + 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, + 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351 +}, +{ + 0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945, + 0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD, + 0x3FC5F181, 0x2C6769F6, 0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4, + 0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C, + 0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, 0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47, + 0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, 0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF, + 0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28, 0x298143B1, 0x3A23DBC6, + 0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, 0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E, + 0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41, + 0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9, + 0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C, 0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0, + 0x5DC6F43D, 0x4E646C4A, 0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78, + 0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, 0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43, + 0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27, 0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB, + 0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, 0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2, + 0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, 0x6CC776E3, 0x7F65EE94, 0x4B82460D, 0x5820DE7A, + 0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, 0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC, + 0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004, + 0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, 0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D, + 0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059, 0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185, + 0x844819FB, 0x97EA818C, 0xA30D2915, 0xB0AFB162, 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE, + 0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, 0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306, + 0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3, 0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F, + 0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, 0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287, + 0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, 0x4A5E5D21, 0x59FCC556, 0x6D1B6DCF, 0x7EB9F5B8, + 0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, 0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600, + 0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439, + 0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, 0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781, + 0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766, 0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA, + 0xE64B1C47, 0xF5E98430, 0xC10E2CA9, 0xD2ACB4DE, 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502, + 0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B, + 0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, 0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483 +}, +{ + 0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469, + 0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC, + 0x70A27D8A, 0xD5E3EFF4, 0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3, + 0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, 0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726, + 0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67, 0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D, + 0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, 0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8, + 0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, 0x0F382284, 0xAA79B0FA, 0x40577089, 0xE516E2F7, + 0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, 0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32, + 0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0, + 0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, 0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75, + 0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, 0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A, + 0x8F96C396, 0x2AD751E8, 0xC0F9919B, 0x65B803E5, 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF, + 0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, 0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4, + 0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B, 0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161, + 0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, 0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E, + 0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, 0xF00C9C98, 0x554D0EE6, 0xBF63CE95, 0x1A225CEB, + 0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, 0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A, + 0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF, + 0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, 0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0, + 0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F, 0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065, + 0x6A638C57, 0xCF221E29, 0x250CDE5A, 0x804D4C24, 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E, + 0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, 0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB, + 0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE, 0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4, + 0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, 0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71, + 0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, 0xD29C5380, 0x77DDC1FE, 0x9DF3018D, 0x38B293F3, + 0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, 0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36, + 0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79, + 0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, 0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC, + 0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD, 0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7, + 0x9557324B, 0x3016A035, 0xDA386046, 0x7F79F238, 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622, + 0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D, + 0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, 0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8 +}, +{ + 0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA, + 0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF, 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C, + 0xE964B13D, 0x34211B85, 0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7, + 0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, 0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11, + 0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2, 0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41, + 0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, 0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7, + 0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, 0x45639445, 0x98263EFD, 0xFA04B7C4, 0x27411D7C, + 0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, 0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A, + 0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D, + 0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, 0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB, + 0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3, 0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610, + 0xB4868D3C, 0x69C32784, 0x0BE1AEBD, 0xD6A40405, 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6, + 0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, 0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6, + 0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3, 0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040, + 0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, 0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B, + 0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, 0x1881A844, 0xC5C402FC, 0xA7E68BC5, 0x7AA3217D, + 0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, 0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5, + 0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213, + 0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, 0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8, + 0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD, 0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E, + 0x8585DDB4, 0x58C0770C, 0x3AE2FE35, 0xE7A7548D, 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E, + 0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, 0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698, + 0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0, 0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443, + 0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, 0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5, + 0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, 0x8224A72B, 0x5F610D93, 0x3D4384AA, 0xE0062E12, + 0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, 0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4, + 0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F, + 0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, 0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9, + 0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A, 0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99, + 0xD867E1B5, 0x05224B0D, 0x6700C234, 0xBA45688C, 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F, + 0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4, + 0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, 0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842 +}, +{ + 0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44, + 0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65, 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5, + 0x8F2261D3, 0xB7330E7F, 0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97, + 0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, 0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406, + 0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3, 0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13, + 0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, 0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082, + 0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, 0x74CF6A34, 0x4CDE0598, 0x04EDB56C, 0x3CFCDAC0, + 0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, 0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151, + 0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA, + 0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, 0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B, + 0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89, 0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539, + 0x7D1400EC, 0x45056F40, 0x0D36DFB4, 0x3527B018, 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8, + 0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, 0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD, + 0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C, 0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C, + 0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, 0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E, + 0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, 0x86F90B0B, 0xBEE864A7, 0xF6DBD453, 0xCECABBFF, + 0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, 0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18, + 0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089, + 0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, 0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB, + 0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, 0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A, + 0x750A600B, 0x4D1B0FA7, 0x0528BF53, 0x3D39D0FF, 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F, + 0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, 0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE, + 0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C, 0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C, + 0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, 0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D, + 0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, 0xB9B60142, 0x81A76EEE, 0xC994DE1A, 0xF185B1B6, + 0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, 0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27, + 0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065, + 0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, 0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4, + 0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51, 0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1, + 0x873C0134, 0xBF2D6E98, 0xF71EDE6C, 0xCF0FB1C0, 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70, + 0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532, + 0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, 0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3 +}, +{ + 0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD, + 0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2, + 0xC00C303E, 0x2F3C5B27, 0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93, + 0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, 0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C, + 0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57, 0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20, + 0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, 0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F, + 0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, 0xF70D11C4, 0x183D7ADD, 0x2C81B107, 0xC3B1DA1E, + 0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, 0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201, + 0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746, + 0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, 0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59, + 0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F, 0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778, + 0xAE0E73CA, 0x413E18D3, 0x7582D309, 0x9AB2B810, 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67, + 0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, 0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB, + 0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3, 0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4, + 0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, 0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5, + 0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, 0x990F5230, 0x763F3929, 0x4283F2F3, 0xADB399EA, + 0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, 0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B, + 0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364, + 0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, 0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45, + 0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D, 0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A, + 0x99FCA15B, 0x76CCCA42, 0x42700198, 0xAD406A81, 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6, + 0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, 0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9, + 0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF, 0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8, + 0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, 0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7, + 0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, 0xA0F9DB4A, 0x4FC9B053, 0x7B757B89, 0x94451090, + 0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, 0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F, + 0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE, + 0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, 0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1, + 0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A, 0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D, + 0xF7FEE2AF, 0x18CE89B6, 0x2C72426C, 0xC3422975, 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02, + 0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623, + 0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, 0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C +}, +{ + 0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089, + 0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B, 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA, + 0x9C5BFAA6, 0xF458D66E, 0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F, + 0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, 0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C, + 0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5, 0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334, + 0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, 0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67, + 0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, 0x04E0BCCA, 0x6CE39002, 0xD4E6E55A, 0xBCE5C992, + 0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, 0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1, + 0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3, + 0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, 0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0, + 0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884, 0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55, + 0xA8C1008F, 0xC0C22C47, 0x78C7591F, 0x10C475D7, 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006, + 0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, 0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E, + 0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC, 0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D, + 0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, 0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8, + 0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, 0x307A46E3, 0x58796A2B, 0xE07C1F73, 0x887F33BB, + 0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, 0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D, + 0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E, + 0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, 0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB, + 0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, 0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988, + 0xC8358D49, 0xA036A181, 0x1833D4D9, 0x7030F811, 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0, + 0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, 0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093, + 0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7, 0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766, + 0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, 0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35, + 0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, 0x2A39CC5F, 0x423AE097, 0xFA3F95CF, 0x923CB907, + 0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, 0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454, + 0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1, + 0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, 0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2, + 0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, 0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA, + 0xFCAF7760, 0x94AC5BA8, 0x2CA92EF0, 0x44AA0238, 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9, + 0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C, + 0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, 0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F +}, +{ + 0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504, + 0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3, 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE, + 0x847609B4, 0xCD4A7493, 0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0, + 0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, 0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A, + 0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0, 0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D, + 0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, 0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447, + 0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, 0xA86BEE40, 0xE1579367, 0x3A13140E, 0x732F6929, + 0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, 0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3, + 0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36, + 0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, 0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC, + 0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF, 0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782, + 0xDC4DC65C, 0x9571BB7B, 0x4E353C12, 0x07094135, 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358, + 0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, 0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF, + 0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18, 0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75, + 0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, 0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B, + 0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, 0xF05021A8, 0xB96C5C8F, 0x6228DBE6, 0x2B14A6C1, + 0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, 0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360, + 0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA, + 0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, 0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4, + 0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63, 0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E, + 0x3901F3FD, 0x703D8EDA, 0xAB7909B3, 0xE2457494, 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9, + 0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, 0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223, + 0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20, 0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D, + 0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, 0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97, + 0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, 0x0F1CDF3B, 0x4620A21C, 0x9D642575, 0xD4585852, + 0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, 0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88, + 0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6, + 0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, 0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C, + 0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6, 0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB, + 0x613A3C15, 0x28064132, 0xF342C65B, 0xBA7EBB7C, 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911, + 0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, 0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F, + 0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, 0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5 +}}; + +#define CRC32_UPD(crc, n) \ + (crc32c_tables[(n)][(crc) & 0xFF] ^ \ + crc32c_tables[(n)-1][((crc) >> 8) & 0xFF]) + +static inline uint32_t +crc32c_1word(uint32_t data, uint32_t init_val) +{ + uint32_t crc, term1, term2; + crc = init_val; + crc ^= data; + + term1 = CRC32_UPD(crc, 3); + term2 = crc >> 16; + crc = term1 ^ CRC32_UPD(term2, 1); + + return crc; +} + +static inline uint32_t +crc32c_2words(uint64_t data, uint32_t init_val) +{ + union { + uint64_t u64; + uint32_t u32[2]; + } d; + d.u64 = data; + + uint32_t crc, term1, term2; + + crc = init_val; + crc ^= d.u32[0]; + + term1 = CRC32_UPD(crc, 7); + term2 = crc >> 16; + crc = term1 ^ CRC32_UPD(term2, 5); + term1 = CRC32_UPD(d.u32[1], 3); + term2 = d.u32[1] >> 16; + crc ^= term1 ^ CRC32_UPD(term2, 1); + + return crc; +} + +#if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64) +static inline uint32_t +crc32c_sse42_u32(uint32_t data, uint32_t init_val) +{ + __asm__ volatile( + "crc32l %[data], %[init_val];" + : [init_val] "+r" (init_val) + : [data] "rm" (data)); + return init_val; +} + +static inline uint32_t +crc32c_sse42_u64_mimic(uint64_t data, uint64_t init_val) +{ + union { + uint32_t u32[2]; + uint64_t u64; + } d; + + d.u64 = data; + init_val = crc32c_sse42_u32(d.u32[0], init_val); + init_val = crc32c_sse42_u32(d.u32[1], init_val); + return init_val; +} +#endif + +#ifdef RTE_ARCH_X86_64 +static inline uint32_t +crc32c_sse42_u64(uint64_t data, uint64_t init_val) +{ + __asm__ volatile( + "crc32q %[data], %[init_val];" + : [init_val] "+r" (init_val) + : [data] "rm" (data)); + return init_val; +} +#endif + +#define CRC32_SW (1U << 0) +#define CRC32_SSE42 (1U << 1) +#define CRC32_x64 (1U << 2) +#define CRC32_SSE42_x64 (CRC32_x64|CRC32_SSE42) +#define CRC32_ARM64 (1U << 3) + +static uint8_t crc32_alg = CRC32_SW; + +#if defined(RTE_ARCH_ARM64) +#include "rte_crc_arm64.h" +#else + +/** + * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash + * calculation. + * + * @param alg + * An OR of following flags: + * - (CRC32_SW) Don't use SSE4.2 intrinsics + * - (CRC32_SSE42) Use SSE4.2 intrinsics if available + * - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default) + * + */ +static inline void +rte_hash_crc_set_alg(uint8_t alg) +{ + switch (alg) { +#if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64) + case CRC32_SSE42_x64: + if (! rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T)) + alg = CRC32_SSE42; + case CRC32_SSE42: + if (! rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2)) + alg = CRC32_SW; +#endif + case CRC32_SW: + crc32_alg = alg; + default: + break; + } +} + +/* Setting the best available algorithm */ +static inline void __attribute__((constructor)) +rte_hash_crc_init_alg(void) +{ + rte_hash_crc_set_alg(CRC32_SSE42_x64); +} + +/** + * Use single crc32 instruction to perform a hash on a 4 byte value. + * Fall back to software crc32 implementation in case SSE4.2 is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_4byte(uint32_t data, uint32_t init_val) +{ +#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64 + if (likely(crc32_alg & CRC32_SSE42)) + return crc32c_sse42_u32(data, init_val); +#endif + + return crc32c_1word(data, init_val); +} + +/** + * Use single crc32 instruction to perform a hash on a 8 byte value. + * Fall back to software crc32 implementation in case SSE4.2 is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_8byte(uint64_t data, uint32_t init_val) +{ +#ifdef RTE_ARCH_X86_64 + if (likely(crc32_alg == CRC32_SSE42_x64)) + return crc32c_sse42_u64(data, init_val); +#endif + +#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64 + if (likely(crc32_alg & CRC32_SSE42)) + return crc32c_sse42_u64_mimic(data, init_val); +#endif + + return crc32c_2words(data, init_val); +} + +#endif + +/** + * Calculate CRC32 hash on user-supplied byte array. + * + * @param data + * Data to perform hash on. + * @param data_len + * How many bytes to use to calculate hash value. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc(const void *data, uint32_t data_len, uint32_t init_val) +{ + unsigned i; + uint64_t temp = 0; + uintptr_t pd = (uintptr_t) data; + + for (i = 0; i < data_len / 8; i++) { + init_val = rte_hash_crc_8byte(*(const uint64_t *)pd, init_val); + pd += 8; + } + + switch (7 - (data_len & 0x07)) { + case 0: + temp |= (uint64_t) *((const uint8_t *)pd + 6) << 48; + /* Fallthrough */ + case 1: + temp |= (uint64_t) *((const uint8_t *)pd + 5) << 40; + /* Fallthrough */ + case 2: + temp |= (uint64_t) *((const uint8_t *)pd + 4) << 32; + temp |= *(const uint32_t *)pd; + init_val = rte_hash_crc_8byte(temp, init_val); + break; + case 3: + init_val = rte_hash_crc_4byte(*(const uint32_t *)pd, init_val); + break; + case 4: + temp |= *((const uint8_t *)pd + 2) << 16; + /* Fallthrough */ + case 5: + temp |= *((const uint8_t *)pd + 1) << 8; + /* Fallthrough */ + case 6: + temp |= *(const uint8_t *)pd; + init_val = rte_hash_crc_4byte(temp, init_val); + /* Fallthrough */ + default: + break; + } + + return init_val; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_HASH_CRC_H_ */ diff --git a/src/dpdk22/lib/librte_hash/rte_jhash.h b/src/dpdk22/lib/librte_hash/rte_jhash.h new file mode 100644 index 00000000..457f225c --- /dev/null +++ b/src/dpdk22/lib/librte_hash/rte_jhash.h @@ -0,0 +1,410 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_JHASH_H +#define _RTE_JHASH_H + +/** + * @file + * + * jhash functions. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#include +#include + +/* jhash.h: Jenkins hash support. + * + * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net) + * + * http://burtleburtle.net/bob/hash/ + * + * These are the credits from Bob's sources: + * + * lookup3.c, by Bob Jenkins, May 2006, Public Domain. + * + * These are functions for producing 32-bit hashes for hash table lookup. + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() + * are externally useful functions. Routines to test the hash are included + * if SELF_TEST is defined. You can use this free for any purpose. It's in + * the public domain. It has no warranty. + * + * $FreeBSD$ + */ + +#define rot(x, k) (((x) << (k)) | ((x) >> (32-(k)))) + +/** @internal Internal function. NOTE: Arguments are modified. */ +#define __rte_jhash_mix(a, b, c) do { \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c, 16); c += b; \ + b -= a; b ^= rot(a, 19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} while (0) + +#define __rte_jhash_final(a, b, c) do { \ + c ^= b; c -= rot(b, 14); \ + a ^= c; a -= rot(c, 11); \ + b ^= a; b -= rot(a, 25); \ + c ^= b; c -= rot(b, 16); \ + a ^= c; a -= rot(c, 4); \ + b ^= a; b -= rot(a, 14); \ + c ^= b; c -= rot(b, 24); \ +} while (0) + +/** The golden ratio: an arbitrary value. */ +#define RTE_JHASH_GOLDEN_RATIO 0xdeadbeef + +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN +#define BIT_SHIFT(x, y, k) (((x) >> (k)) | ((uint64_t)(y) << (32-(k)))) +#else +#define BIT_SHIFT(x, y, k) (((uint64_t)(x) << (k)) | ((y) >> (32-(k)))) +#endif + +#define LOWER8b_MASK rte_le_to_cpu_32(0xff) +#define LOWER16b_MASK rte_le_to_cpu_32(0xffff) +#define LOWER24b_MASK rte_le_to_cpu_32(0xffffff) + +static inline void +__rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, + uint32_t *pb, unsigned check_align) +{ + uint32_t a, b, c; + + /* Set up the internal state */ + a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc; + c += *pb; + + /* + * Check key alignment. For x86 architecture, first case is always optimal + * If check_align is not set, first case will be used + */ +#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32) + const uint32_t *k = key; + const uint32_t s = 0; +#else + const uint32_t *k = (uint32_t *)((uintptr_t)key & (uintptr_t)~3); + const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT; +#endif + if (!check_align || s == 0) { + while (length > 12) { + a += k[0]; + b += k[1]; + c += k[2]; + + __rte_jhash_mix(a, b, c); + + k += 3; + length -= 12; + } + + switch (length) { + case 12: + c += k[2]; b += k[1]; a += k[0]; break; + case 11: + c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break; + case 10: + c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break; + case 9: + c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break; + case 8: + b += k[1]; a += k[0]; break; + case 7: + b += k[1] & LOWER24b_MASK; a += k[0]; break; + case 6: + b += k[1] & LOWER16b_MASK; a += k[0]; break; + case 5: + b += k[1] & LOWER8b_MASK; a += k[0]; break; + case 4: + a += k[0]; break; + case 3: + a += k[0] & LOWER24b_MASK; break; + case 2: + a += k[0] & LOWER16b_MASK; break; + case 1: + a += k[0] & LOWER8b_MASK; break; + /* zero length strings require no mixing */ + case 0: + *pc = c; + *pb = b; + return; + }; + } else { + /* all but the last block: affect some 32 bits of (a, b, c) */ + while (length > 12) { + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + c += BIT_SHIFT(k[2], k[3], s); + __rte_jhash_mix(a, b, c); + + k += 3; + length -= 12; + } + + /* last block: affect all 32 bits of (c) */ + switch (length) { + case 12: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + c += BIT_SHIFT(k[2], k[3], s); + break; + case 11: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + c += BIT_SHIFT(k[2], k[3], s) & LOWER24b_MASK; + break; + case 10: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + c += BIT_SHIFT(k[2], k[3], s) & LOWER16b_MASK; + break; + case 9: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + c += BIT_SHIFT(k[2], k[3], s) & LOWER8b_MASK; + break; + case 8: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + break; + case 7: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s) & LOWER24b_MASK; + break; + case 6: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s) & LOWER16b_MASK; + break; + case 5: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s) & LOWER8b_MASK; + break; + case 4: + a += BIT_SHIFT(k[0], k[1], s); + break; + case 3: + a += BIT_SHIFT(k[0], k[1], s) & LOWER24b_MASK; + break; + case 2: + a += BIT_SHIFT(k[0], k[1], s) & LOWER16b_MASK; + break; + case 1: + a += BIT_SHIFT(k[0], k[1], s) & LOWER8b_MASK; + break; + /* zero length strings require no mixing */ + case 0: + *pc = c; + *pb = b; + return; + } + } + + __rte_jhash_final(a, b, c); + + *pc = c; + *pb = b; +} + +/** + * Same as rte_jhash, but takes two seeds and return two uint32_ts. + * pc and pb must be non-null, and *pc and *pb must both be initialized + * with seeds. If you pass in (*pb)=0, the output (*pc) will be + * the same as the return value from rte_jhash. + * + * @param key + * Key to calculate hash of. + * @param length + * Length of key in bytes. + * @param pc + * IN: seed OUT: primary hash value. + * @param pb + * IN: second seed OUT: secondary hash value. + */ +static inline void +rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb) +{ + __rte_jhash_2hashes(key, length, pc, pb, 1); +} + +/** + * Same as rte_jhash_32b, but takes two seeds and return two uint32_ts. + * pc and pb must be non-null, and *pc and *pb must both be initialized + * with seeds. If you pass in (*pb)=0, the output (*pc) will be + * the same as the return value from rte_jhash_32b. + * + * @param k + * Key to calculate hash of. + * @param length + * Length of key in units of 4 bytes. + * @param pc + * IN: seed OUT: primary hash value. + * @param pb + * IN: second seed OUT: secondary hash value. + */ +static inline void +rte_jhash_32b_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb) +{ + __rte_jhash_2hashes((const void *) k, (length << 2), pc, pb, 0); +} + +/** + * The most generic version, hashes an arbitrary sequence + * of bytes. No alignment or length assumptions are made about + * the input key. + * + * @param key + * Key to calculate hash of. + * @param length + * Length of key in bytes. + * @param initval + * Initialising value of hash. + * @return + * Calculated hash value. + */ +static inline uint32_t +rte_jhash(const void *key, uint32_t length, uint32_t initval) +{ + uint32_t initval2 = 0; + + rte_jhash_2hashes(key, length, &initval, &initval2); + + return initval; +} + +/** + * A special optimized version that handles 1 or more of uint32_ts. + * The length parameter here is the number of uint32_ts in the key. + * + * @param k + * Key to calculate hash of. + * @param length + * Length of key in units of 4 bytes. + * @param initval + * Initialising value of hash. + * @return + * Calculated hash value. + */ +static inline uint32_t +rte_jhash_32b(const uint32_t *k, uint32_t length, uint32_t initval) +{ + uint32_t initval2 = 0; + + rte_jhash_32b_2hashes(k, length, &initval, &initval2); + + return initval; +} + +static inline uint32_t +__rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval) +{ + a += RTE_JHASH_GOLDEN_RATIO + initval; + b += RTE_JHASH_GOLDEN_RATIO + initval; + c += RTE_JHASH_GOLDEN_RATIO + initval; + + __rte_jhash_final(a, b, c); + + return c; +} + +/** + * A special ultra-optimized versions that knows it is hashing exactly + * 3 words. + * + * @param a + * First word to calculate hash of. + * @param b + * Second word to calculate hash of. + * @param c + * Third word to calculate hash of. + * @param initval + * Initialising value of hash. + * @return + * Calculated hash value. + */ +static inline uint32_t +rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval) +{ + return __rte_jhash_3words(a + 12, b + 12, c + 12, initval); +} + +/** + * A special ultra-optimized versions that knows it is hashing exactly + * 2 words. + * + * @param a + * First word to calculate hash of. + * @param b + * Second word to calculate hash of. + * @param initval + * Initialising value of hash. + * @return + * Calculated hash value. + */ +static inline uint32_t +rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval) +{ + return __rte_jhash_3words(a + 8, b + 8, 8, initval); +} + +/** + * A special ultra-optimized versions that knows it is hashing exactly + * 1 word. + * + * @param a + * Word to calculate hash of. + * @param initval + * Initialising value of hash. + * @return + * Calculated hash value. + */ +static inline uint32_t +rte_jhash_1word(uint32_t a, uint32_t initval) +{ + return __rte_jhash_3words(a + 4, 4, 4, initval); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_JHASH_H */ diff --git a/src/dpdk22/lib/librte_hash/rte_thash.h b/src/dpdk22/lib/librte_hash/rte_thash.h new file mode 100644 index 00000000..d98e98e7 --- /dev/null +++ b/src/dpdk22/lib/librte_hash/rte_thash.h @@ -0,0 +1,250 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Vladimir Medvedkin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_THASH_H +#define _RTE_THASH_H + +/** + * @file + * + * toeplitz hash functions. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Software implementation of the Toeplitz hash function used by RSS. + * Can be used either for packet distribution on single queue NIC + * or for simulating of RSS computation on specific NIC (for example + * after GRE header decapsulating) + */ + +#include +#include +#include + +#ifdef __SSE3__ +#include +#endif + +#ifdef __SSE3__ +/* Byte swap mask used for converting IPv6 address + * 4-byte chunks to CPU byte order + */ +static const __m128i rte_thash_ipv6_bswap_mask = { + 0x0405060700010203ULL, 0x0C0D0E0F08090A0BULL}; +#endif + +/** + * length in dwords of input tuple to + * calculate hash of ipv4 header only + */ +#define RTE_THASH_V4_L3_LEN ((sizeof(struct rte_ipv4_tuple) - \ + sizeof(((struct rte_ipv4_tuple *)0)->sctp_tag)) / 4) + +/** + * length in dwords of input tuple to + * calculate hash of ipv4 header + + * transport header + */ +#define RTE_THASH_V4_L4_LEN ((sizeof(struct rte_ipv4_tuple)) / 4) + +/** + * length in dwords of input tuple to + * calculate hash of ipv6 header only + */ +#define RTE_THASH_V6_L3_LEN ((sizeof(struct rte_ipv6_tuple) - \ + sizeof(((struct rte_ipv6_tuple *)0)->sctp_tag)) / 4) + +/** + * length in dwords of input tuple to + * calculate hash of ipv6 header + + * transport header + */ +#define RTE_THASH_V6_L4_LEN ((sizeof(struct rte_ipv6_tuple)) / 4) + +/** + * IPv4 tuple + * addresses and ports/sctp_tag have to be CPU byte order + */ +struct rte_ipv4_tuple { + uint32_t src_addr; + uint32_t dst_addr; + union { + struct { + uint16_t dport; + uint16_t sport; + }; + uint32_t sctp_tag; + }; +}; + +/** + * IPv6 tuple + * Addresses have to be filled by rte_thash_load_v6_addr() + * ports/sctp_tag have to be CPU byte order + */ +struct rte_ipv6_tuple { + uint8_t src_addr[16]; + uint8_t dst_addr[16]; + union { + struct { + uint16_t dport; + uint16_t sport; + }; + uint32_t sctp_tag; + }; +}; + +union rte_thash_tuple { + struct rte_ipv4_tuple v4; + struct rte_ipv6_tuple v6; +#ifdef __SSE3__ +} __attribute__((aligned(XMM_SIZE))); +#else +}; +#endif + +/** + * Prepare special converted key to use with rte_softrss_be() + * @param orig + * pointer to original RSS key + * @param targ + * pointer to target RSS key + * @param len + * RSS key length + */ +static inline void +rte_convert_rss_key(const uint32_t *orig, uint32_t *targ, int len) +{ + int i; + + for (i = 0; i < (len >> 2); i++) + targ[i] = rte_be_to_cpu_32(orig[i]); +} + +/** + * Prepare and load IPv6 addresses (src and dst) + * into target tuple + * @param orig + * Pointer to ipv6 header of the original packet + * @param targ + * Pointer to rte_ipv6_tuple structure + */ +static inline void +rte_thash_load_v6_addrs(const struct ipv6_hdr *orig, union rte_thash_tuple *targ) +{ +#ifdef __SSE3__ + __m128i ipv6 = _mm_loadu_si128((const __m128i *)orig->src_addr); + *(__m128i *)targ->v6.src_addr = + _mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask); + ipv6 = _mm_loadu_si128((const __m128i *)orig->dst_addr); + *(__m128i *)targ->v6.dst_addr = + _mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask); +#else + int i; + for (i = 0; i < 4; i++) { + *((uint32_t *)targ->v6.src_addr + i) = + rte_be_to_cpu_32(*((const uint32_t *)orig->src_addr + i)); + *((uint32_t *)targ->v6.dst_addr + i) = + rte_be_to_cpu_32(*((const uint32_t *)orig->dst_addr + i)); + } +#endif +} + +/** + * Generic implementation. Can be used with original rss_key + * @param input_tuple + * Pointer to input tuple + * @param input_len + * Length of input_tuple in 4-bytes chunks + * @param rss_key + * Pointer to RSS hash key. + * @return + * Calculated hash value. + */ +static inline uint32_t +rte_softrss(uint32_t *input_tuple, uint32_t input_len, + const uint8_t *rss_key) +{ + uint32_t i, j, ret = 0; + + for (j = 0; j < input_len; j++) { + for (i = 0; i < 32; i++) { + if (input_tuple[j] & (1 << (31 - i))) { + ret ^= rte_cpu_to_be_32(((const uint32_t *)rss_key)[j]) << i | + (uint32_t)((uint64_t)(rte_cpu_to_be_32(((const uint32_t *)rss_key)[j + 1])) >> + (32 - i)); + } + } + } + return ret; +} + +/** + * Optimized implementation. + * If you want the calculated hash value matches NIC RSS value + * you have to use special converted key with rte_convert_rss_key() fn. + * @param input_tuple + * Pointer to input tuple + * @param input_len + * Length of input_tuple in 4-bytes chunks + * @param *rss_key + * Pointer to RSS hash key. + * @return + * Calculated hash value. + */ +static inline uint32_t +rte_softrss_be(uint32_t *input_tuple, uint32_t input_len, + const uint8_t *rss_key) +{ + uint32_t i, j, ret = 0; + + for (j = 0; j < input_len; j++) { + for (i = 0; i < 32; i++) { + if (input_tuple[j] & (1 << (31 - i))) { + ret ^= ((const uint32_t *)rss_key)[j] << i | + (uint32_t)((uint64_t)(((const uint32_t *)rss_key)[j + 1]) >> (32 - i)); + } + } + } + return ret; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_THASH_H */ diff --git a/src/dpdk22/lib/librte_kvargs/rte_kvargs.c b/src/dpdk22/lib/librte_kvargs/rte_kvargs.c new file mode 100644 index 00000000..c2dd0513 --- /dev/null +++ b/src/dpdk22/lib/librte_kvargs/rte_kvargs.c @@ -0,0 +1,212 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * Copyright(c) 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +#include +#include + +#include "rte_kvargs.h" + +/* + * Receive a string with a list of arguments following the pattern + * key=value;key=value;... and insert them into the list. + * strtok() is used so the params string will be copied to be modified. + */ +static int +rte_kvargs_tokenize(struct rte_kvargs *kvlist, const char *params) +{ + unsigned i; + char *str; + char *ctx1 = NULL; + char *ctx2 = NULL; + + /* Copy the const char *params to a modifiable string + * to pass to rte_strsplit + */ + kvlist->str = strdup(params); + if (kvlist->str == NULL) { + RTE_LOG(ERR, PMD, "Cannot parse arguments: not enough memory\n"); + return -1; + } + + /* browse each key/value pair and add it in kvlist */ + str = kvlist->str; + while ((str = strtok_r(str, RTE_KVARGS_PAIRS_DELIM, &ctx1)) != NULL) { + + i = kvlist->count; + if (i >= RTE_KVARGS_MAX) { + RTE_LOG(ERR, PMD, "Cannot parse arguments: list full\n"); + return -1; + } + + kvlist->pairs[i].key = strtok_r(str, RTE_KVARGS_KV_DELIM, &ctx2); + kvlist->pairs[i].value = strtok_r(NULL, RTE_KVARGS_KV_DELIM, &ctx2); + if (kvlist->pairs[i].key == NULL || kvlist->pairs[i].value == NULL) { + RTE_LOG(ERR, PMD, + "Cannot parse arguments: wrong key or value\n" + "params=<%s>\n", params); + return -1; + } + + kvlist->count++; + str = NULL; + } + + return 0; +} + +/* + * Determine whether a key is valid or not by looking + * into a list of valid keys. + */ +static int +is_valid_key(const char *valid[], const char *key_match) +{ + const char **valid_ptr; + + for (valid_ptr = valid; *valid_ptr != NULL; valid_ptr++) { + if (strcmp(key_match, *valid_ptr) == 0) + return 1; + } + return 0; +} + +/* + * Determine whether all keys are valid or not by looking + * into a list of valid keys. + */ +static int +check_for_valid_keys(struct rte_kvargs *kvlist, + const char *valid[]) +{ + unsigned i, ret; + struct rte_kvargs_pair *pair; + + for (i = 0; i < kvlist->count; i++) { + pair = &kvlist->pairs[i]; + ret = is_valid_key(valid, pair->key); + if (!ret) { + RTE_LOG(ERR, PMD, + "Error parsing device, invalid key <%s>\n", + pair->key); + return -1; + } + } + return 0; +} + +/* + * Return the number of times a given arg_name exists in the key/value list. + * E.g. given a list = { rx = 0, rx = 1, tx = 2 } the number of args for + * arg "rx" will be 2. + */ +unsigned +rte_kvargs_count(const struct rte_kvargs *kvlist, const char *key_match) +{ + const struct rte_kvargs_pair *pair; + unsigned i, ret; + + ret = 0; + for (i = 0; i < kvlist->count; i++) { + pair = &kvlist->pairs[i]; + if (key_match == NULL || strcmp(pair->key, key_match) == 0) + ret++; + } + + return ret; +} + +/* + * For each matching key, call the given handler function. + */ +int +rte_kvargs_process(const struct rte_kvargs *kvlist, + const char *key_match, + arg_handler_t handler, + void *opaque_arg) +{ + const struct rte_kvargs_pair *pair; + unsigned i; + + for (i = 0; i < kvlist->count; i++) { + pair = &kvlist->pairs[i]; + if (key_match == NULL || strcmp(pair->key, key_match) == 0) { + if ((*handler)(pair->key, pair->value, opaque_arg) < 0) + return -1; + } + } + return 0; +} + +/* free the rte_kvargs structure */ +void +rte_kvargs_free(struct rte_kvargs *kvlist) +{ + if (!kvlist) + return; + + if (kvlist->str != NULL) + free(kvlist->str); + + free(kvlist); +} + +/* + * Parse the arguments "key=value;key=value;..." string and return + * an allocated structure that contains a key/value list. Also + * check if only valid keys were used. + */ +struct rte_kvargs * +rte_kvargs_parse(const char *args, const char *valid_keys[]) +{ + struct rte_kvargs *kvlist; + + kvlist = malloc(sizeof(*kvlist)); + if (kvlist == NULL) + return NULL; + memset(kvlist, 0, sizeof(*kvlist)); + + if (rte_kvargs_tokenize(kvlist, args) < 0) { + rte_kvargs_free(kvlist); + return NULL; + } + + if (valid_keys != NULL && check_for_valid_keys(kvlist, valid_keys) < 0) { + rte_kvargs_free(kvlist); + return NULL; + } + + return kvlist; +} diff --git a/src/dpdk22/lib/librte_kvargs/rte_kvargs.h b/src/dpdk22/lib/librte_kvargs/rte_kvargs.h new file mode 100644 index 00000000..ae9ae79f --- /dev/null +++ b/src/dpdk22/lib/librte_kvargs/rte_kvargs.h @@ -0,0 +1,156 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * Copyright(c) 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_KVARGS_H_ +#define _RTE_KVARGS_H_ + +/** + * @file + * RTE Argument parsing + * + * This module can be used to parse arguments whose format is + * key1=value1,key2=value2,key3=value3,... + * + * The same key can appear several times with the same or a different + * value. Indeed, the arguments are stored as a list of key/values + * associations and not as a dictionary. + * + * This file provides some helpers that are especially used by virtual + * ethernet devices at initialization for arguments parsing. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** Maximum number of key/value associations */ +#define RTE_KVARGS_MAX 32 + +/** separator character used between each pair */ +#define RTE_KVARGS_PAIRS_DELIM "," + +/** separator character used between key and value */ +#define RTE_KVARGS_KV_DELIM "=" + +/** Type of callback function used by rte_kvargs_process() */ +typedef int (*arg_handler_t)(const char *key, const char *value, void *opaque); + +/** A key/value association */ +struct rte_kvargs_pair { + char *key; /**< the name (key) of the association */ + char *value; /**< the value associated to that key */ +}; + +/** Store a list of key/value associations */ +struct rte_kvargs { + char *str; /**< copy of the argument string */ + unsigned count; /**< number of entries in the list */ + struct rte_kvargs_pair pairs[RTE_KVARGS_MAX]; /**< list of key/values */ +}; + +/** + * Allocate a rte_kvargs and store key/value associations from a string + * + * The function allocates and fills a rte_kvargs structure from a given + * string whose format is key1=value1,key2=value2,... + * + * The structure can be freed with rte_kvargs_free(). + * + * @param args + * The input string containing the key/value associations + * @param valid_keys + * A list of valid keys (table of const char *, the last must be NULL). + * This argument is ignored if NULL + * + * @return + * - A pointer to an allocated rte_kvargs structure on success + * - NULL on error + */ +struct rte_kvargs *rte_kvargs_parse(const char *args, const char *valid_keys[]); + +/** + * Free a rte_kvargs structure + * + * Free a rte_kvargs structure previously allocated with + * rte_kvargs_parse(). + * + * @param kvlist + * The rte_kvargs structure + */ +void rte_kvargs_free(struct rte_kvargs *kvlist); + +/** + * Call a handler function for each key/value matching the key + * + * For each key/value association that matches the given key, calls the + * handler function with the for a given arg_name passing the value on the + * dictionary for that key and a given extra argument. If *kvlist* is NULL + * function does nothing. + * + * @param kvlist + * The rte_kvargs structure + * @param key_match + * The key on which the handler should be called, or NULL to process handler + * on all associations + * @param handler + * The function to call for each matching key + * @param opaque_arg + * A pointer passed unchanged to the handler + * + * @return + * - 0 on success + * - Negative on error + */ +int rte_kvargs_process(const struct rte_kvargs *kvlist, + const char *key_match, arg_handler_t handler, void *opaque_arg); + +/** + * Count the number of associations matching the given key + * + * @param kvlist + * The rte_kvargs structure + * @param key_match + * The key that should match, or NULL to count all associations + + * @return + * The number of entries + */ +unsigned rte_kvargs_count(const struct rte_kvargs *kvlist, + const char *key_match); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_mbuf/rte_mbuf.c b/src/dpdk22/lib/librte_mbuf/rte_mbuf.c new file mode 100644 index 00000000..c18b4381 --- /dev/null +++ b/src/dpdk22/lib/librte_mbuf/rte_mbuf.c @@ -0,0 +1,288 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * ctrlmbuf constructor, given as a callback function to + * rte_mempool_create() + */ +void +rte_ctrlmbuf_init(struct rte_mempool *mp, + __attribute__((unused)) void *opaque_arg, + void *_m, + __attribute__((unused)) unsigned i) +{ + struct rte_mbuf *m = _m; + rte_pktmbuf_init(mp, opaque_arg, _m, i); + m->ol_flags |= CTRL_MBUF_FLAG; +} + +/* + * pktmbuf pool constructor, given as a callback function to + * rte_mempool_create() + */ +void +rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg) +{ + struct rte_pktmbuf_pool_private *user_mbp_priv, *mbp_priv; + struct rte_pktmbuf_pool_private default_mbp_priv; + uint16_t roomsz; + + RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct rte_mbuf)); + + /* if no structure is provided, assume no mbuf private area */ + user_mbp_priv = opaque_arg; + if (user_mbp_priv == NULL) { + default_mbp_priv.mbuf_priv_size = 0; + if (mp->elt_size > sizeof(struct rte_mbuf)) + roomsz = mp->elt_size - sizeof(struct rte_mbuf); + else + roomsz = 0; + default_mbp_priv.mbuf_data_room_size = roomsz; + user_mbp_priv = &default_mbp_priv; + } + + RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct rte_mbuf) + + user_mbp_priv->mbuf_data_room_size + + user_mbp_priv->mbuf_priv_size); + + mbp_priv = rte_mempool_get_priv(mp); + memcpy(mbp_priv, user_mbp_priv, sizeof(*mbp_priv)); +} + +/* + * pktmbuf constructor, given as a callback function to + * rte_mempool_create(). + * Set the fields of a packet mbuf to their default values. + */ +void +rte_pktmbuf_init(struct rte_mempool *mp, + __attribute__((unused)) void *opaque_arg, + void *_m, + __attribute__((unused)) unsigned i) +{ + struct rte_mbuf *m = _m; + uint32_t mbuf_size, buf_len, priv_size; + + priv_size = rte_pktmbuf_priv_size(mp); + mbuf_size = sizeof(struct rte_mbuf) + priv_size; + buf_len = rte_pktmbuf_data_room_size(mp); + + RTE_MBUF_ASSERT(RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) == priv_size); + RTE_MBUF_ASSERT(mp->elt_size >= mbuf_size); + RTE_MBUF_ASSERT(buf_len <= UINT16_MAX); + + memset(m, 0, mp->elt_size); + + /* start of buffer is after mbuf structure and priv data */ + m->priv_size = priv_size; + m->buf_addr = (char *)m + mbuf_size; + m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size; + m->buf_len = (uint16_t)buf_len; + + /* keep some headroom between start of buffer and data */ + m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len); + + /* init some constant fields */ + m->pool = mp; + m->nb_segs = 1; + m->port = 0xff; +} + +/* helper to create a mbuf pool */ +struct rte_mempool * +rte_pktmbuf_pool_create(const char *name, unsigned n, + unsigned cache_size, uint16_t priv_size, uint16_t data_room_size, + int socket_id) +{ + struct rte_pktmbuf_pool_private mbp_priv; + unsigned elt_size; + + if (RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) != priv_size) { + RTE_LOG(ERR, MBUF, "mbuf priv_size=%u is not aligned\n", + priv_size); + rte_errno = EINVAL; + return NULL; + } + elt_size = sizeof(struct rte_mbuf) + (unsigned)priv_size + + (unsigned)data_room_size; + mbp_priv.mbuf_data_room_size = data_room_size; + mbp_priv.mbuf_priv_size = priv_size; + + return rte_mempool_create(name, n, elt_size, + cache_size, sizeof(struct rte_pktmbuf_pool_private), + rte_pktmbuf_pool_init, &mbp_priv, rte_pktmbuf_init, NULL, + socket_id, 0); +} + +/* do some sanity checks on a mbuf: panic if it fails */ +void +rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header) +{ + const struct rte_mbuf *m_seg; + unsigned nb_segs; + + if (m == NULL) + rte_panic("mbuf is NULL\n"); + + /* generic checks */ + if (m->pool == NULL) + rte_panic("bad mbuf pool\n"); + if (m->buf_physaddr == 0) + rte_panic("bad phys addr\n"); + if (m->buf_addr == NULL) + rte_panic("bad virt addr\n"); + + uint16_t cnt = rte_mbuf_refcnt_read(m); + if ((cnt == 0) || (cnt == UINT16_MAX)) + rte_panic("bad ref cnt\n"); + + /* nothing to check for sub-segments */ + if (is_header == 0) + return; + + nb_segs = m->nb_segs; + m_seg = m; + while (m_seg && nb_segs != 0) { + m_seg = m_seg->next; + nb_segs--; + } + if (nb_segs != 0) + rte_panic("bad nb_segs\n"); +} + +/* dump a mbuf on console */ +void +rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len) +{ + unsigned int len; + unsigned nb_segs; + + __rte_mbuf_sanity_check(m, 1); + + fprintf(f, "dump mbuf at 0x%p, phys=%"PRIx64", buf_len=%u\n", + m, (uint64_t)m->buf_physaddr, (unsigned)m->buf_len); + fprintf(f, " pkt_len=%"PRIu32", ol_flags=%"PRIx64", nb_segs=%u, " + "in_port=%u\n", m->pkt_len, m->ol_flags, + (unsigned)m->nb_segs, (unsigned)m->port); + nb_segs = m->nb_segs; + + while (m && nb_segs != 0) { + __rte_mbuf_sanity_check(m, 0); + + fprintf(f, " segment at 0x%p, data=0x%p, data_len=%u\n", + m, rte_pktmbuf_mtod(m, void *), (unsigned)m->data_len); + len = dump_len; + if (len > m->data_len) + len = m->data_len; + if (len != 0) + rte_hexdump(f, NULL, rte_pktmbuf_mtod(m, void *), len); + dump_len -= len; + m = m->next; + nb_segs --; + } +} + +/* + * Get the name of a RX offload flag. Must be kept synchronized with flag + * definitions in rte_mbuf.h. + */ +const char *rte_get_rx_ol_flag_name(uint64_t mask) +{ + switch (mask) { + case PKT_RX_VLAN_PKT: return "PKT_RX_VLAN_PKT"; + case PKT_RX_RSS_HASH: return "PKT_RX_RSS_HASH"; + case PKT_RX_FDIR: return "PKT_RX_FDIR"; + case PKT_RX_L4_CKSUM_BAD: return "PKT_RX_L4_CKSUM_BAD"; + case PKT_RX_IP_CKSUM_BAD: return "PKT_RX_IP_CKSUM_BAD"; + /* case PKT_RX_EIP_CKSUM_BAD: return "PKT_RX_EIP_CKSUM_BAD"; */ + /* case PKT_RX_OVERSIZE: return "PKT_RX_OVERSIZE"; */ + /* case PKT_RX_HBUF_OVERFLOW: return "PKT_RX_HBUF_OVERFLOW"; */ + /* case PKT_RX_RECIP_ERR: return "PKT_RX_RECIP_ERR"; */ + /* case PKT_RX_MAC_ERR: return "PKT_RX_MAC_ERR"; */ + case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP"; + case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST"; + default: return NULL; + } +} + +/* + * Get the name of a TX offload flag. Must be kept synchronized with flag + * definitions in rte_mbuf.h. + */ +const char *rte_get_tx_ol_flag_name(uint64_t mask) +{ + switch (mask) { + case PKT_TX_VLAN_PKT: return "PKT_TX_VLAN_PKT"; + case PKT_TX_IP_CKSUM: return "PKT_TX_IP_CKSUM"; + case PKT_TX_TCP_CKSUM: return "PKT_TX_TCP_CKSUM"; + case PKT_TX_SCTP_CKSUM: return "PKT_TX_SCTP_CKSUM"; + case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM"; + case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST"; + case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG"; + case PKT_TX_IPV4: return "PKT_TX_IPV4"; + case PKT_TX_IPV6: return "PKT_TX_IPV6"; + case PKT_TX_OUTER_IP_CKSUM: return "PKT_TX_OUTER_IP_CKSUM"; + case PKT_TX_OUTER_IPV4: return "PKT_TX_OUTER_IPV4"; + case PKT_TX_OUTER_IPV6: return "PKT_TX_OUTER_IPV6"; + default: return NULL; + } +} diff --git a/src/dpdk22/lib/librte_mbuf/rte_mbuf.h b/src/dpdk22/lib/librte_mbuf/rte_mbuf.h new file mode 100644 index 00000000..f234ac9a --- /dev/null +++ b/src/dpdk22/lib/librte_mbuf/rte_mbuf.h @@ -0,0 +1,1865 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MBUF_H_ +#define _RTE_MBUF_H_ + +/** + * @file + * RTE Mbuf + * + * The mbuf library provides the ability to create and destroy buffers + * that may be used by the RTE application to store message + * buffers. The message buffers are stored in a mempool, using the + * RTE mempool library. + * + * This library provide an API to allocate/free packet mbufs, which are + * used to carry network packets. + * + * To understand the concepts of packet buffers or mbufs, you + * should read "TCP/IP Illustrated, Volume 2: The Implementation, + * Addison-Wesley, 1995, ISBN 0-201-63354-X from Richard Stevens" + * http://www.kohala.com/start/tcpipiv2.html + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* deprecated options */ +#pragma GCC poison RTE_MBUF_SCATTER_GATHER +#pragma GCC poison RTE_MBUF_REFCNT + +/* + * Packet Offload Features Flags. It also carry packet type information. + * Critical resources. Both rx/tx shared these bits. Be cautious on any change + * + * - RX flags start at bit position zero, and get added to the left of previous + * flags. + * - The most-significant 3 bits are reserved for generic mbuf flags + * - TX flags therefore start at bit position 60 (i.e. 63-3), and new flags get + * added to the right of the previously defined flags i.e. they should count + * downwards, not upwards. + * + * Keep these flags synchronized with rte_get_rx_ol_flag_name() and + * rte_get_tx_ol_flag_name(). + */ +#define PKT_RX_VLAN_PKT (1ULL << 0) /**< RX packet is a 802.1q VLAN packet. */ +#define PKT_RX_RSS_HASH (1ULL << 1) /**< RX packet with RSS hash result. */ +#define PKT_RX_FDIR (1ULL << 2) /**< RX packet with FDIR match indicate. */ +#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) /**< L4 cksum of RX pkt. is not OK. */ +#define PKT_RX_IP_CKSUM_BAD (1ULL << 4) /**< IP cksum of RX pkt. is not OK. */ +#define PKT_RX_EIP_CKSUM_BAD (0ULL << 0) /**< External IP header checksum error. */ +#define PKT_RX_OVERSIZE (0ULL << 0) /**< Num of desc of an RX pkt oversize. */ +#define PKT_RX_HBUF_OVERFLOW (0ULL << 0) /**< Header buffer overflow. */ +#define PKT_RX_RECIP_ERR (0ULL << 0) /**< Hardware processing error. */ +#define PKT_RX_MAC_ERR (0ULL << 0) /**< MAC error. */ +#define PKT_RX_IEEE1588_PTP (1ULL << 9) /**< RX IEEE1588 L2 Ethernet PT Packet. */ +#define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/ +#define PKT_RX_FDIR_ID (1ULL << 13) /**< FD id reported if FDIR match. */ +#define PKT_RX_FDIR_FLX (1ULL << 14) /**< Flexible bytes reported if FDIR match. */ +#define PKT_RX_QINQ_PKT (1ULL << 15) /**< RX packet with double VLAN stripped. */ +/* add new RX flags here */ + +/* add new TX flags here */ + +/** + * Second VLAN insertion (QinQ) flag. + */ +#define PKT_TX_QINQ_PKT (1ULL << 49) /**< TX packet with double VLAN inserted. */ + +/** + * TCP segmentation offload. To enable this offload feature for a + * packet to be transmitted on hardware supporting TSO: + * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies + * PKT_TX_TCP_CKSUM) + * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6 + * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum + * to 0 in the packet + * - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz + * - calculate the pseudo header checksum without taking ip_len in account, + * and set it in the TCP header. Refer to rte_ipv4_phdr_cksum() and + * rte_ipv6_phdr_cksum() that can be used as helpers. + */ +#define PKT_TX_TCP_SEG (1ULL << 50) + +#define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to timestamp. */ + +/** + * Bits 52+53 used for L4 packet type with checksum enabled: 00: Reserved, + * 01: TCP checksum, 10: SCTP checksum, 11: UDP checksum. To use hardware + * L4 checksum offload, the user needs to: + * - fill l2_len and l3_len in mbuf + * - set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM + * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6 + * - calculate the pseudo header checksum and set it in the L4 header (only + * for TCP or UDP). See rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum(). + * For SCTP, set the crc field to 0. + */ +#define PKT_TX_L4_NO_CKSUM (0ULL << 52) /**< Disable L4 cksum of TX pkt. */ +#define PKT_TX_TCP_CKSUM (1ULL << 52) /**< TCP cksum of TX pkt. computed by NIC. */ +#define PKT_TX_SCTP_CKSUM (2ULL << 52) /**< SCTP cksum of TX pkt. computed by NIC. */ +#define PKT_TX_UDP_CKSUM (3ULL << 52) /**< UDP cksum of TX pkt. computed by NIC. */ +#define PKT_TX_L4_MASK (3ULL << 52) /**< Mask for L4 cksum offload request. */ + +/** + * Offload the IP checksum in the hardware. The flag PKT_TX_IPV4 should + * also be set by the application, although a PMD will only check + * PKT_TX_IP_CKSUM. + * - set the IP checksum field in the packet to 0 + * - fill the mbuf offload information: l2_len, l3_len + */ +#define PKT_TX_IP_CKSUM (1ULL << 54) + +/** + * Packet is IPv4. This flag must be set when using any offload feature + * (TSO, L3 or L4 checksum) to tell the NIC that the packet is an IPv4 + * packet. If the packet is a tunneled packet, this flag is related to + * the inner headers. + */ +#define PKT_TX_IPV4 (1ULL << 55) + +/** + * Packet is IPv6. This flag must be set when using an offload feature + * (TSO or L4 checksum) to tell the NIC that the packet is an IPv6 + * packet. If the packet is a tunneled packet, this flag is related to + * the inner headers. + */ +#define PKT_TX_IPV6 (1ULL << 56) + +#define PKT_TX_VLAN_PKT (1ULL << 57) /**< TX packet is a 802.1q VLAN packet. */ + +/** + * Offload the IP checksum of an external header in the hardware. The + * flag PKT_TX_OUTER_IPV4 should also be set by the application, alto ugh + * a PMD will only check PKT_TX_IP_CKSUM. The IP checksum field in the + * packet must be set to 0. + * - set the outer IP checksum field in the packet to 0 + * - fill the mbuf offload information: outer_l2_len, outer_l3_len + */ +#define PKT_TX_OUTER_IP_CKSUM (1ULL << 58) + +/** + * Packet outer header is IPv4. This flag must be set when using any + * outer offload feature (L3 or L4 checksum) to tell the NIC that the + * outer header of the tunneled packet is an IPv4 packet. + */ +#define PKT_TX_OUTER_IPV4 (1ULL << 59) + +/** + * Packet outer header is IPv6. This flag must be set when using any + * outer offload feature (L4 checksum) to tell the NIC that the outer + * header of the tunneled packet is an IPv6 packet. + */ +#define PKT_TX_OUTER_IPV6 (1ULL << 60) + +#define __RESERVED (1ULL << 61) /**< reserved for future mbuf use */ + +#define IND_ATTACHED_MBUF (1ULL << 62) /**< Indirect attached mbuf */ + +/* Use final bit of flags to indicate a control mbuf */ +#define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */ + +/* + * 32 bits are divided into several fields to mark packet types. Note that + * each field is indexical. + * - Bit 3:0 is for L2 types. + * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types. + * - Bit 11:8 is for L4 or outer L4 (for tunneling case) types. + * - Bit 15:12 is for tunnel types. + * - Bit 19:16 is for inner L2 types. + * - Bit 23:20 is for inner L3 types. + * - Bit 27:24 is for inner L4 types. + * - Bit 31:28 is reserved. + * + * To be compatible with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT, + * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP + * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits. + * + * Note that L3 types values are selected for checking IPV4/IPV6 header from + * performance point of view. Reading annotations of RTE_ETH_IS_IPV4_HDR and + * RTE_ETH_IS_IPV6_HDR is needed for any future changes of L3 type values. + * + * Note that the packet types of the same packet recognized by different + * hardware may be different, as different hardware may have different + * capability of packet type recognition. + * + * examples: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=0x29 + * | 'version'=6, 'next header'=0x3A + * | 'ICMPv6 header'> + * will be recognized on i40e hardware as packet type combination of, + * RTE_PTYPE_L2_ETHER | + * RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + * RTE_PTYPE_TUNNEL_IP | + * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + * RTE_PTYPE_INNER_L4_ICMP. + * + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=0x2F + * | 'GRE header' + * | 'version'=6, 'next header'=0x11 + * | 'UDP header'> + * will be recognized on i40e hardware as packet type combination of, + * RTE_PTYPE_L2_ETHER | + * RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + * RTE_PTYPE_TUNNEL_GRENAT | + * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + * RTE_PTYPE_INNER_L4_UDP. + */ +#define RTE_PTYPE_UNKNOWN 0x00000000 +/** + * Ethernet packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=[0x0800|0x86DD]> + */ +#define RTE_PTYPE_L2_ETHER 0x00000001 +/** + * Ethernet packet type for time sync. + * + * Packet format: + * <'ether type'=0x88F7> + */ +#define RTE_PTYPE_L2_ETHER_TIMESYNC 0x00000002 +/** + * ARP (Address Resolution Protocol) packet type. + * + * Packet format: + * <'ether type'=0x0806> + */ +#define RTE_PTYPE_L2_ETHER_ARP 0x00000003 +/** + * LLDP (Link Layer Discovery Protocol) packet type. + * + * Packet format: + * <'ether type'=0x88CC> + */ +#define RTE_PTYPE_L2_ETHER_LLDP 0x00000004 +/** + * Mask of layer 2 packet types. + * It is used for outer packet for tunneling cases. + */ +#define RTE_PTYPE_L2_MASK 0x0000000f +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for outer packet for tunneling cases, and does not contain any + * header option. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=5> + */ +#define RTE_PTYPE_L3_IPV4 0x00000010 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for outer packet for tunneling cases, and contains header + * options. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[6-15], 'options'> + */ +#define RTE_PTYPE_L3_IPV4_EXT 0x00000030 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for outer packet for tunneling cases, and does not contain any + * extension header. + * + * Packet format: + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=0x3B> + */ +#define RTE_PTYPE_L3_IPV6 0x00000040 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for outer packet for tunneling cases, and may or maynot contain + * header options. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[5-15], <'options'>> + */ +#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN 0x00000090 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for outer packet for tunneling cases, and contains extension + * headers. + * + * Packet format: + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * 'extension headers'> + */ +#define RTE_PTYPE_L3_IPV6_EXT 0x000000c0 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for outer packet for tunneling cases, and may or maynot contain + * extension headers. + * + * Packet format: + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * <'extension headers'>> + */ +#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN 0x000000e0 +/** + * Mask of layer 3 packet types. + * It is used for outer packet for tunneling cases. + */ +#define RTE_PTYPE_L3_MASK 0x000000f0 +/** + * TCP (Transmission Control Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=6, 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=6> + */ +#define RTE_PTYPE_L4_TCP 0x00000100 +/** + * UDP (User Datagram Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17, 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17> + */ +#define RTE_PTYPE_L4_UDP 0x00000200 +/** + * Fragmented IP (Internet Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * It refers to those packets of any IP types, which can be recognized as + * fragmented. A fragmented packet cannot be recognized as any other L4 types + * (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP, + * RTE_PTYPE_L4_NONFRAG). + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'MF'=1> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=44> + */ +#define RTE_PTYPE_L4_FRAG 0x00000300 +/** + * SCTP (Stream Control Transmission Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=132, 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=132> + */ +#define RTE_PTYPE_L4_SCTP 0x00000400 +/** + * ICMP (Internet Control Message Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=1, 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=1> + */ +#define RTE_PTYPE_L4_ICMP 0x00000500 +/** + * Non-fragmented IP (Internet Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * It refers to those packets of any IP types, while cannot be recognized as + * any of above L4 types (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, + * RTE_PTYPE_L4_FRAG, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP). + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'!=[6|17|44|132|1]> + */ +#define RTE_PTYPE_L4_NONFRAG 0x00000600 +/** + * Mask of layer 4 packet types. + * It is used for outer packet for tunneling cases. + */ +#define RTE_PTYPE_L4_MASK 0x00000f00 +/** + * IP (Internet Protocol) in IP (Internet Protocol) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=[4|41]> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[4|41]> + */ +#define RTE_PTYPE_TUNNEL_IP 0x00001000 +/** + * GRE (Generic Routing Encapsulation) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=47> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=47> + */ +#define RTE_PTYPE_TUNNEL_GRE 0x00002000 +/** + * VXLAN (Virtual eXtensible Local Area Network) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17 + * | 'destination port'=4798> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17 + * | 'destination port'=4798> + */ +#define RTE_PTYPE_TUNNEL_VXLAN 0x00003000 +/** + * NVGRE (Network Virtualization using Generic Routing Encapsulation) tunneling + * packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=47 + * | 'protocol type'=0x6558> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=47 + * | 'protocol type'=0x6558'> + */ +#define RTE_PTYPE_TUNNEL_NVGRE 0x00004000 +/** + * GENEVE (Generic Network Virtualization Encapsulation) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17 + * | 'destination port'=6081> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17 + * | 'destination port'=6081> + */ +#define RTE_PTYPE_TUNNEL_GENEVE 0x00005000 +/** + * Tunneling packet type of Teredo, VXLAN (Virtual eXtensible Local Area + * Network) or GRE (Generic Routing Encapsulation) could be recognized as this + * packet type, if they can not be recognized independently as of hardware + * capability. + */ +#define RTE_PTYPE_TUNNEL_GRENAT 0x00006000 +/** + * Mask of tunneling packet types. + */ +#define RTE_PTYPE_TUNNEL_MASK 0x0000f000 +/** + * Ethernet packet type. + * It is used for inner packet type only. + * + * Packet format (inner only): + * <'ether type'=[0x800|0x86DD]> + */ +#define RTE_PTYPE_INNER_L2_ETHER 0x00010000 +/** + * Ethernet packet type with VLAN (Virtual Local Area Network) tag. + * + * Packet format (inner only): + * <'ether type'=[0x800|0x86DD], vlan=[1-4095]> + */ +#define RTE_PTYPE_INNER_L2_ETHER_VLAN 0x00020000 +/** + * Mask of inner layer 2 packet types. + */ +#define RTE_PTYPE_INNER_L2_MASK 0x000f0000 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for inner packet only, and does not contain any header option. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=5> + */ +#define RTE_PTYPE_INNER_L3_IPV4 0x00100000 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for inner packet only, and contains header options. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[6-15], 'options'> + */ +#define RTE_PTYPE_INNER_L3_IPV4_EXT 0x00200000 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for inner packet only, and does not contain any extension header. + * + * Packet format (inner only): + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=0x3B> + */ +#define RTE_PTYPE_INNER_L3_IPV6 0x00300000 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for inner packet only, and may or maynot contain header options. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[5-15], <'options'>> + */ +#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for inner packet only, and contains extension headers. + * + * Packet format (inner only): + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * 'extension headers'> + */ +#define RTE_PTYPE_INNER_L3_IPV6_EXT 0x00500000 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for inner packet only, and may or maynot contain extension + * headers. + * + * Packet format (inner only): + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * <'extension headers'>> + */ +#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000 +/** + * Mask of inner layer 3 packet types. + */ +#define RTE_PTYPE_INNER_L3_MASK 0x00f00000 +/** + * TCP (Transmission Control Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=6, 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=6> + */ +#define RTE_PTYPE_INNER_L4_TCP 0x01000000 +/** + * UDP (User Datagram Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17, 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17> + */ +#define RTE_PTYPE_INNER_L4_UDP 0x02000000 +/** + * Fragmented IP (Internet Protocol) packet type. + * It is used for inner packet only, and may or maynot have layer 4 packet. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'MF'=1> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=44> + */ +#define RTE_PTYPE_INNER_L4_FRAG 0x03000000 +/** + * SCTP (Stream Control Transmission Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=132, 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=132> + */ +#define RTE_PTYPE_INNER_L4_SCTP 0x04000000 +/** + * ICMP (Internet Control Message Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=1, 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=1> + */ +#define RTE_PTYPE_INNER_L4_ICMP 0x05000000 +/** + * Non-fragmented IP (Internet Protocol) packet type. + * It is used for inner packet only, and may or maynot have other unknown layer + * 4 packet types. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'!=[6|17|44|132|1]> + */ +#define RTE_PTYPE_INNER_L4_NONFRAG 0x06000000 +/** + * Mask of inner layer 4 packet types. + */ +#define RTE_PTYPE_INNER_L4_MASK 0x0f000000 + +/** + * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by + * one, bit 4 is selected to be used for IPv4 only. Then checking bit 4 can + * determine if it is an IPV4 packet. + */ +#define RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4) + +/** + * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by + * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can + * determine if it is an IPV4 packet. + */ +#define RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6) + +/* Check if it is a tunneling packet */ +#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) & (RTE_PTYPE_TUNNEL_MASK | \ + RTE_PTYPE_INNER_L2_MASK | \ + RTE_PTYPE_INNER_L3_MASK | \ + RTE_PTYPE_INNER_L4_MASK)) + +/** Alignment constraint of mbuf private area. */ +#define RTE_MBUF_PRIV_ALIGN 8 + +/** + * Get the name of a RX offload flag + * + * @param mask + * The mask describing the flag. + * @return + * The name of this flag, or NULL if it's not a valid RX flag. + */ +const char *rte_get_rx_ol_flag_name(uint64_t mask); + +/** + * Get the name of a TX offload flag + * + * @param mask + * The mask describing the flag. Usually only one bit must be set. + * Several bits can be given if they belong to the same mask. + * Ex: PKT_TX_L4_MASK. + * @return + * The name of this flag, or NULL if it's not a valid TX flag. + */ +const char *rte_get_tx_ol_flag_name(uint64_t mask); + +/** + * Some NICs need at least 2KB buffer to RX standard Ethernet frame without + * splitting it into multiple segments. + * So, for mbufs that planned to be involved into RX/TX, the recommended + * minimal buffer length is 2KB + RTE_PKTMBUF_HEADROOM. + */ +#define RTE_MBUF_DEFAULT_DATAROOM 2048 +#define RTE_MBUF_DEFAULT_BUF_SIZE \ + (RTE_MBUF_DEFAULT_DATAROOM + RTE_PKTMBUF_HEADROOM) + +/* define a set of marker types that can be used to refer to set points in the + * mbuf */ +typedef void *MARKER[0]; /**< generic marker for a point in a structure */ +typedef uint8_t MARKER8[0]; /**< generic marker with 1B alignment */ +typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 bytes + * with a single assignment */ + +/** Opaque rte_mbuf_offload structure declarations */ +struct rte_mbuf_offload; + +/** + * The generic rte_mbuf, containing a packet mbuf. + */ +struct rte_mbuf { + MARKER cacheline0; + + void *buf_addr; /**< Virtual address of segment buffer. */ + phys_addr_t buf_physaddr; /**< Physical address of segment buffer. */ + + uint16_t buf_len; /**< Length of segment buffer. */ + + /* next 6 bytes are initialised on RX descriptor rearm */ + MARKER8 rearm_data; + uint16_t data_off; + + /** + * 16-bit Reference counter. + * It should only be accessed using the following functions: + * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and + * rte_mbuf_refcnt_set(). The functionality of these functions (atomic, + * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC + * config option. + */ + union { + rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */ + uint16_t refcnt; /**< Non-atomically accessed refcnt */ + }; + uint8_t nb_segs; /**< Number of segments. */ + uint8_t port; /**< Input port. */ + + uint64_t ol_flags; /**< Offload features. */ + + /* remaining bytes are set on RX when pulling packet from descriptor */ + MARKER rx_descriptor_fields1; + + /* + * The packet type, which is the combination of outer/inner L2, L3, L4 + * and tunnel types. + */ + union { + uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */ + struct { + uint32_t l2_type:4; /**< (Outer) L2 type. */ + uint32_t l3_type:4; /**< (Outer) L3 type. */ + uint32_t l4_type:4; /**< (Outer) L4 type. */ + uint32_t tun_type:4; /**< Tunnel type. */ + uint32_t inner_l2_type:4; /**< Inner L2 type. */ + uint32_t inner_l3_type:4; /**< Inner L3 type. */ + uint32_t inner_l4_type:4; /**< Inner L4 type. */ + }; + }; + + uint32_t pkt_len; /**< Total pkt len: sum of all segments. */ + uint16_t data_len; /**< Amount of data in segment buffer. */ + uint16_t vlan_tci; /**< VLAN Tag Control Identifier (CPU order) */ + + union { + uint32_t rss; /**< RSS hash result if RSS enabled */ + struct { + union { + struct { + uint16_t hash; + uint16_t id; + }; + uint32_t lo; + /**< Second 4 flexible bytes */ + }; + uint32_t hi; + /**< First 4 flexible bytes or FD ID, dependent on + PKT_RX_FDIR_* flag in ol_flags. */ + } fdir; /**< Filter identifier if FDIR enabled */ + struct { + uint32_t lo; + uint32_t hi; + } sched; /**< Hierarchical scheduler */ + uint32_t usr; /**< User defined tags. See rte_distributor_process() */ + } hash; /**< hash information */ + + uint32_t seqn; /**< Sequence number. See also rte_reorder_insert() */ + + uint16_t vlan_tci_outer; /**< Outer VLAN Tag Control Identifier (CPU order) */ + + /* second cache line - fields only used in slow path or on TX */ + MARKER cacheline1 __rte_cache_aligned; + + union { + void *userdata; /**< Can be used for external metadata */ + uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */ + }; + + struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */ + struct rte_mbuf *next; /**< Next segment of scattered packet. */ + + /* fields to support TX offloads */ + union { + uint64_t tx_offload; /**< combined for easy fetch */ + struct { + uint64_t l2_len:7; /**< L2 (MAC) Header Length. */ + uint64_t l3_len:9; /**< L3 (IP) Header Length. */ + uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */ + uint64_t tso_segsz:16; /**< TCP TSO segment size */ + + /* fields for TX offloading of tunnels */ + uint64_t outer_l3_len:9; /**< Outer L3 (IP) Hdr Length. */ + uint64_t outer_l2_len:7; /**< Outer L2 (MAC) Hdr Length. */ + + /* uint64_t unused:8; */ + }; + }; + + /** Size of the application private data. In case of an indirect + * mbuf, it stores the direct mbuf private data size. */ + uint16_t priv_size; + + /** Timesync flags for use with IEEE1588. */ + uint16_t timesync; + + /* Chain of off-load operations to perform on mbuf */ + struct rte_mbuf_offload *offload_ops; +} __rte_cache_aligned; + +static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp); + +/** + * Return the mbuf owning the data buffer address of an indirect mbuf. + * + * @param mi + * The pointer to the indirect mbuf. + * @return + * The address of the direct mbuf corresponding to buffer_addr. + */ +static inline struct rte_mbuf * +rte_mbuf_from_indirect(struct rte_mbuf *mi) +{ + return (struct rte_mbuf *)RTE_PTR_SUB(mi->buf_addr, sizeof(*mi) + mi->priv_size); +} + +/** + * Return the buffer address embedded in the given mbuf. + * + * @param md + * The pointer to the mbuf. + * @return + * The address of the data buffer owned by the mbuf. + */ +static inline char * +rte_mbuf_to_baddr(struct rte_mbuf *md) +{ + char *buffer_addr; + buffer_addr = (char *)md + sizeof(*md) + rte_pktmbuf_priv_size(md->pool); + return buffer_addr; +} + +/** + * Returns TRUE if given mbuf is indirect, or FALSE otherwise. + */ +#define RTE_MBUF_INDIRECT(mb) ((mb)->ol_flags & IND_ATTACHED_MBUF) + +/** + * Returns TRUE if given mbuf is direct, or FALSE otherwise. + */ +#define RTE_MBUF_DIRECT(mb) (!RTE_MBUF_INDIRECT(mb)) + +/** + * Private data in case of pktmbuf pool. + * + * A structure that contains some pktmbuf_pool-specific data that are + * appended after the mempool structure (in private data). + */ +struct rte_pktmbuf_pool_private { + uint16_t mbuf_data_room_size; /**< Size of data space in each mbuf. */ + uint16_t mbuf_priv_size; /**< Size of private area in each mbuf. */ +}; + +#ifdef RTE_LIBRTE_MBUF_DEBUG + +/** check mbuf type in debug mode */ +#define __rte_mbuf_sanity_check(m, is_h) rte_mbuf_sanity_check(m, is_h) + +/** check mbuf type in debug mode if mbuf pointer is not null */ +#define __rte_mbuf_sanity_check_raw(m, is_h) do { \ + if ((m) != NULL) \ + rte_mbuf_sanity_check(m, is_h); \ +} while (0) + +/** MBUF asserts in debug mode */ +#define RTE_MBUF_ASSERT(exp) \ +if (!(exp)) { \ + rte_panic("line%d\tassert \"" #exp "\" failed\n", __LINE__); \ +} + +#else /* RTE_LIBRTE_MBUF_DEBUG */ + +/** check mbuf type in debug mode */ +#define __rte_mbuf_sanity_check(m, is_h) do { } while (0) + +/** check mbuf type in debug mode if mbuf pointer is not null */ +#define __rte_mbuf_sanity_check_raw(m, is_h) do { } while (0) + +/** MBUF asserts in debug mode */ +#define RTE_MBUF_ASSERT(exp) do { } while (0) + +#endif /* RTE_LIBRTE_MBUF_DEBUG */ + +#ifdef RTE_MBUF_REFCNT_ATOMIC + +/** + * Reads the value of an mbuf's refcnt. + * @param m + * Mbuf to read + * @return + * Reference count number. + */ +static inline uint16_t +rte_mbuf_refcnt_read(const struct rte_mbuf *m) +{ + return (uint16_t)(rte_atomic16_read(&m->refcnt_atomic)); +} + +/** + * Sets an mbuf's refcnt to a defined value. + * @param m + * Mbuf to update + * @param new_value + * Value set + */ +static inline void +rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value) +{ + rte_atomic16_set(&m->refcnt_atomic, new_value); +} + +/** + * Adds given value to an mbuf's refcnt and returns its new value. + * @param m + * Mbuf to update + * @param value + * Value to add/subtract + * @return + * Updated value + */ +static inline uint16_t +rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value) +{ + /* + * The atomic_add is an expensive operation, so we don't want to + * call it in the case where we know we are the uniq holder of + * this mbuf (i.e. ref_cnt == 1). Otherwise, an atomic + * operation has to be used because concurrent accesses on the + * reference counter can occur. + */ + if (likely(rte_mbuf_refcnt_read(m) == 1)) { + rte_mbuf_refcnt_set(m, 1 + value); + return 1 + value; + } + + return (uint16_t)(rte_atomic16_add_return(&m->refcnt_atomic, value)); +} + +#else /* ! RTE_MBUF_REFCNT_ATOMIC */ + +/** + * Adds given value to an mbuf's refcnt and returns its new value. + */ +static inline uint16_t +rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value) +{ + m->refcnt = (uint16_t)(m->refcnt + value); + return m->refcnt; +} + +/** + * Reads the value of an mbuf's refcnt. + */ +static inline uint16_t +rte_mbuf_refcnt_read(const struct rte_mbuf *m) +{ + return m->refcnt; +} + +/** + * Sets an mbuf's refcnt to the defined value. + */ +static inline void +rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value) +{ + m->refcnt = new_value; +} + +#endif /* RTE_MBUF_REFCNT_ATOMIC */ + +/** Mbuf prefetch */ +#define RTE_MBUF_PREFETCH_TO_FREE(m) do { \ + if ((m) != NULL) \ + rte_prefetch0(m); \ +} while (0) + + +/** + * Sanity checks on an mbuf. + * + * Check the consistency of the given mbuf. The function will cause a + * panic if corruption is detected. + * + * @param m + * The mbuf to be checked. + * @param is_header + * True if the mbuf is a packet header, false if it is a sub-segment + * of a packet (in this case, some fields like nb_segs are not checked) + */ +void +rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header); + +/** + * @internal Allocate a new mbuf from mempool *mp*. + * The use of that function is reserved for RTE internal needs. + * Please use rte_pktmbuf_alloc(). + * + * @param mp + * The mempool from which mbuf is allocated. + * @return + * - The pointer to the new mbuf on success. + * - NULL if allocation failed. + */ +static inline struct rte_mbuf *__rte_mbuf_raw_alloc(struct rte_mempool *mp) +{ + struct rte_mbuf *m; + void *mb = NULL; + if (rte_mempool_get(mp, &mb) < 0) + return NULL; + m = (struct rte_mbuf *)mb; + RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(m) == 0); + rte_mbuf_refcnt_set(m, 1); + return m; +} + +/** + * @internal Put mbuf back into its original mempool. + * The use of that function is reserved for RTE internal needs. + * Please use rte_pktmbuf_free(). + * + * @param m + * The mbuf to be freed. + */ +static inline void __attribute__((always_inline)) +__rte_mbuf_raw_free(struct rte_mbuf *m) +{ + RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(m) == 0); + rte_mempool_put(m->pool, m); +} + +/* Operations on ctrl mbuf */ + +/** + * The control mbuf constructor. + * + * This function initializes some fields in an mbuf structure that are + * not modified by the user once created (mbuf type, origin pool, buffer + * start address, and so on). This function is given as a callback function + * to rte_mempool_create() at pool creation time. + * + * @param mp + * The mempool from which the mbuf is allocated. + * @param opaque_arg + * A pointer that can be used by the user to retrieve useful information + * for mbuf initialization. This pointer comes from the ``init_arg`` + * parameter of rte_mempool_create(). + * @param m + * The mbuf to initialize. + * @param i + * The index of the mbuf in the pool table. + */ +void rte_ctrlmbuf_init(struct rte_mempool *mp, void *opaque_arg, + void *m, unsigned i); + +/** + * Allocate a new mbuf (type is ctrl) from mempool *mp*. + * + * This new mbuf is initialized with data pointing to the beginning of + * buffer, and with a length of zero. + * + * @param mp + * The mempool from which the mbuf is allocated. + * @return + * - The pointer to the new mbuf on success. + * - NULL if allocation failed. + */ +#define rte_ctrlmbuf_alloc(mp) rte_pktmbuf_alloc(mp) + +/** + * Free a control mbuf back into its original mempool. + * + * @param m + * The control mbuf to be freed. + */ +#define rte_ctrlmbuf_free(m) rte_pktmbuf_free(m) + +/** + * A macro that returns the pointer to the carried data. + * + * The value that can be read or assigned. + * + * @param m + * The control mbuf. + */ +#define rte_ctrlmbuf_data(m) ((char *)((m)->buf_addr) + (m)->data_off) + +/** + * A macro that returns the length of the carried data. + * + * The value that can be read or assigned. + * + * @param m + * The control mbuf. + */ +#define rte_ctrlmbuf_len(m) rte_pktmbuf_data_len(m) + +/** + * Tests if an mbuf is a control mbuf + * + * @param m + * The mbuf to be tested + * @return + * - True (1) if the mbuf is a control mbuf + * - False(0) otherwise + */ +static inline int +rte_is_ctrlmbuf(struct rte_mbuf *m) +{ + return !!(m->ol_flags & CTRL_MBUF_FLAG); +} + +/* Operations on pkt mbuf */ + +/** + * The packet mbuf constructor. + * + * This function initializes some fields in the mbuf structure that are + * not modified by the user once created (origin pool, buffer start + * address, and so on). This function is given as a callback function to + * rte_mempool_create() at pool creation time. + * + * @param mp + * The mempool from which mbufs originate. + * @param opaque_arg + * A pointer that can be used by the user to retrieve useful information + * for mbuf initialization. This pointer comes from the ``init_arg`` + * parameter of rte_mempool_create(). + * @param m + * The mbuf to initialize. + * @param i + * The index of the mbuf in the pool table. + */ +void rte_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, + void *m, unsigned i); + + +/** + * A packet mbuf pool constructor. + * + * This function initializes the mempool private data in the case of a + * pktmbuf pool. This private data is needed by the driver. The + * function is given as a callback function to rte_mempool_create() at + * pool creation. It can be extended by the user, for example, to + * provide another packet size. + * + * @param mp + * The mempool from which mbufs originate. + * @param opaque_arg + * A pointer that can be used by the user to retrieve useful information + * for mbuf initialization. This pointer comes from the ``init_arg`` + * parameter of rte_mempool_create(). + */ +void rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg); + +/** + * Create a mbuf pool. + * + * This function creates and initializes a packet mbuf pool. It is + * a wrapper to rte_mempool_create() with the proper packet constructor + * and mempool constructor. + * + * @param name + * The name of the mbuf pool. + * @param n + * The number of elements in the mbuf pool. The optimum size (in terms + * of memory usage) for a mempool is when n is a power of two minus one: + * n = (2^q - 1). + * @param cache_size + * Size of the per-core object cache. See rte_mempool_create() for + * details. + * @param priv_size + * Size of application private are between the rte_mbuf structure + * and the data buffer. This value must be aligned to RTE_MBUF_PRIV_ALIGN. + * @param data_room_size + * Size of data buffer in each mbuf, including RTE_PKTMBUF_HEADROOM. + * @param socket_id + * The socket identifier where the memory should be allocated. The + * value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the + * reserved zone. + * @return + * The pointer to the new allocated mempool, on success. NULL on error + * with rte_errno set appropriately. Possible rte_errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - EINVAL - cache size provided is too large, or priv_size is not aligned. + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_mempool * +rte_pktmbuf_pool_create(const char *name, unsigned n, + unsigned cache_size, uint16_t priv_size, uint16_t data_room_size, + int socket_id); + +/** + * Get the data room size of mbufs stored in a pktmbuf_pool + * + * The data room size is the amount of data that can be stored in a + * mbuf including the headroom (RTE_PKTMBUF_HEADROOM). + * + * @param mp + * The packet mbuf pool. + * @return + * The data room size of mbufs stored in this mempool. + */ +static inline uint16_t +rte_pktmbuf_data_room_size(struct rte_mempool *mp) +{ + struct rte_pktmbuf_pool_private *mbp_priv; + + mbp_priv = (struct rte_pktmbuf_pool_private *)rte_mempool_get_priv(mp); + return mbp_priv->mbuf_data_room_size; +} + +/** + * Get the application private size of mbufs stored in a pktmbuf_pool + * + * The private size of mbuf is a zone located between the rte_mbuf + * structure and the data buffer where an application can store data + * associated to a packet. + * + * @param mp + * The packet mbuf pool. + * @return + * The private size of mbufs stored in this mempool. + */ +static inline uint16_t +rte_pktmbuf_priv_size(struct rte_mempool *mp) +{ + struct rte_pktmbuf_pool_private *mbp_priv; + + mbp_priv = (struct rte_pktmbuf_pool_private *)rte_mempool_get_priv(mp); + return mbp_priv->mbuf_priv_size; +} + +/** + * Reset the fields of a packet mbuf to their default values. + * + * The given mbuf must have only one segment. + * + * @param m + * The packet mbuf to be resetted. + */ +static inline void rte_pktmbuf_reset(struct rte_mbuf *m) +{ + m->next = NULL; + m->pkt_len = 0; + m->tx_offload = 0; + m->vlan_tci = 0; + m->vlan_tci_outer = 0; + m->nb_segs = 1; + m->port = 0xff; + + m->ol_flags = 0; + m->packet_type = 0; + m->data_off = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ? + RTE_PKTMBUF_HEADROOM : m->buf_len; + + m->data_len = 0; + __rte_mbuf_sanity_check(m, 1); +} + +/** + * Allocate a new mbuf from a mempool. + * + * This new mbuf contains one segment, which has a length of 0. The pointer + * to data is initialized to have some bytes of headroom in the buffer + * (if buffer size allows). + * + * @param mp + * The mempool from which the mbuf is allocated. + * @return + * - The pointer to the new mbuf on success. + * - NULL if allocation failed. + */ +static inline struct rte_mbuf *rte_pktmbuf_alloc(struct rte_mempool *mp) +{ + struct rte_mbuf *m; + if ((m = __rte_mbuf_raw_alloc(mp)) != NULL) + rte_pktmbuf_reset(m); + return m; +} + +/** + * Attach packet mbuf to another packet mbuf. + * + * After attachment we refer the mbuf we attached as 'indirect', + * while mbuf we attached to as 'direct'. + * Right now, not supported: + * - attachment for already indirect mbuf (e.g. - mi has to be direct). + * - mbuf we trying to attach (mi) is used by someone else + * e.g. it's reference counter is greater then 1. + * + * @param mi + * The indirect packet mbuf. + * @param m + * The packet mbuf we're attaching to. + */ +static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m) +{ + struct rte_mbuf *md; + + RTE_MBUF_ASSERT(RTE_MBUF_DIRECT(mi) && + rte_mbuf_refcnt_read(mi) == 1); + + /* if m is not direct, get the mbuf that embeds the data */ + if (RTE_MBUF_DIRECT(m)) + md = m; + else + md = rte_mbuf_from_indirect(m); + + rte_mbuf_refcnt_update(md, 1); + mi->priv_size = m->priv_size; + mi->buf_physaddr = m->buf_physaddr; + mi->buf_addr = m->buf_addr; + mi->buf_len = m->buf_len; + + mi->next = m->next; + mi->data_off = m->data_off; + mi->data_len = m->data_len; + mi->port = m->port; + mi->vlan_tci = m->vlan_tci; + mi->vlan_tci_outer = m->vlan_tci_outer; + mi->tx_offload = m->tx_offload; + mi->hash = m->hash; + + mi->next = NULL; + mi->pkt_len = mi->data_len; + mi->nb_segs = 1; + mi->ol_flags = m->ol_flags | IND_ATTACHED_MBUF; + mi->packet_type = m->packet_type; + + __rte_mbuf_sanity_check(mi, 1); + __rte_mbuf_sanity_check(m, 0); +} + +/** + * Detach an indirect packet mbuf. + * + * - restore original mbuf address and length values. + * - reset pktmbuf data and data_len to their default values. + * All other fields of the given packet mbuf will be left intact. + * + * @param m + * The indirect attached packet mbuf. + */ +static inline void rte_pktmbuf_detach(struct rte_mbuf *m) +{ + struct rte_mempool *mp = m->pool; + uint32_t mbuf_size, buf_len, priv_size; + + priv_size = rte_pktmbuf_priv_size(mp); + mbuf_size = sizeof(struct rte_mbuf) + priv_size; + buf_len = rte_pktmbuf_data_room_size(mp); + + m->priv_size = priv_size; + m->buf_addr = (char *)m + mbuf_size; + m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size; + m->buf_len = (uint16_t)buf_len; + m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len); + m->data_len = 0; + m->ol_flags = 0; +} + +static inline struct rte_mbuf* __attribute__((always_inline)) +__rte_pktmbuf_prefree_seg(struct rte_mbuf *m) +{ + __rte_mbuf_sanity_check(m, 0); + + if (likely(rte_mbuf_refcnt_update(m, -1) == 0)) { + + /* if this is an indirect mbuf, then + * - detach mbuf + * - free attached mbuf segment + */ + if (RTE_MBUF_INDIRECT(m)) { + struct rte_mbuf *md = rte_mbuf_from_indirect(m); + rte_pktmbuf_detach(m); + if (rte_mbuf_refcnt_update(md, -1) == 0) + __rte_mbuf_raw_free(md); + } + return m; + } + return NULL; +} + +/** + * Free a segment of a packet mbuf into its original mempool. + * + * Free an mbuf, without parsing other segments in case of chained + * buffers. + * + * @param m + * The packet mbuf segment to be freed. + */ +static inline void __attribute__((always_inline)) +rte_pktmbuf_free_seg(struct rte_mbuf *m) +{ + if (likely(NULL != (m = __rte_pktmbuf_prefree_seg(m)))) { + m->next = NULL; + __rte_mbuf_raw_free(m); + } +} + +/** + * Free a packet mbuf back into its original mempool. + * + * Free an mbuf, and all its segments in case of chained buffers. Each + * segment is added back into its original mempool. + * + * @param m + * The packet mbuf to be freed. + */ +static inline void rte_pktmbuf_free(struct rte_mbuf *m) +{ + struct rte_mbuf *m_next; + + __rte_mbuf_sanity_check(m, 1); + + while (m != NULL) { + m_next = m->next; + rte_pktmbuf_free_seg(m); + m = m_next; + } +} + +/** + * Creates a "clone" of the given packet mbuf. + * + * Walks through all segments of the given packet mbuf, and for each of them: + * - Creates a new packet mbuf from the given pool. + * - Attaches newly created mbuf to the segment. + * Then updates pkt_len and nb_segs of the "clone" packet mbuf to match values + * from the original packet mbuf. + * + * @param md + * The packet mbuf to be cloned. + * @param mp + * The mempool from which the "clone" mbufs are allocated. + * @return + * - The pointer to the new "clone" mbuf on success. + * - NULL if allocation fails. + */ +static inline struct rte_mbuf *rte_pktmbuf_clone(struct rte_mbuf *md, + struct rte_mempool *mp) +{ + struct rte_mbuf *mc, *mi, **prev; + uint32_t pktlen; + uint8_t nseg; + + if (unlikely ((mc = rte_pktmbuf_alloc(mp)) == NULL)) + return NULL; + + mi = mc; + prev = &mi->next; + pktlen = md->pkt_len; + nseg = 0; + + do { + nseg++; + rte_pktmbuf_attach(mi, md); + *prev = mi; + prev = &mi->next; + } while ((md = md->next) != NULL && + (mi = rte_pktmbuf_alloc(mp)) != NULL); + + *prev = NULL; + mc->nb_segs = nseg; + mc->pkt_len = pktlen; + + /* Allocation of new indirect segment failed */ + if (unlikely (mi == NULL)) { + rte_pktmbuf_free(mc); + return NULL; + } + + __rte_mbuf_sanity_check(mc, 1); + return mc; +} + +/** + * Adds given value to the refcnt of all packet mbuf segments. + * + * Walks through all segments of given packet mbuf and for each of them + * invokes rte_mbuf_refcnt_update(). + * + * @param m + * The packet mbuf whose refcnt to be updated. + * @param v + * The value to add to the mbuf's segments refcnt. + */ +static inline void rte_pktmbuf_refcnt_update(struct rte_mbuf *m, int16_t v) +{ + __rte_mbuf_sanity_check(m, 1); + + do { + rte_mbuf_refcnt_update(m, v); + } while ((m = m->next) != NULL); +} + +/** + * Get the headroom in a packet mbuf. + * + * @param m + * The packet mbuf. + * @return + * The length of the headroom. + */ +static inline uint16_t rte_pktmbuf_headroom(const struct rte_mbuf *m) +{ + __rte_mbuf_sanity_check(m, 1); + return m->data_off; +} + +/** + * Get the tailroom of a packet mbuf. + * + * @param m + * The packet mbuf. + * @return + * The length of the tailroom. + */ +static inline uint16_t rte_pktmbuf_tailroom(const struct rte_mbuf *m) +{ + __rte_mbuf_sanity_check(m, 1); + return (uint16_t)(m->buf_len - rte_pktmbuf_headroom(m) - + m->data_len); +} + +/** + * Get the last segment of the packet. + * + * @param m + * The packet mbuf. + * @return + * The last segment of the given mbuf. + */ +static inline struct rte_mbuf *rte_pktmbuf_lastseg(struct rte_mbuf *m) +{ + struct rte_mbuf *m2 = (struct rte_mbuf *)m; + + __rte_mbuf_sanity_check(m, 1); + while (m2->next != NULL) + m2 = m2->next; + return m2; +} + +/** + * A macro that points to an offset into the data in the mbuf. + * + * The returned pointer is cast to type t. Before using this + * function, the user must ensure that the first segment is large + * enough to accommodate its data. + * + * @param m + * The packet mbuf. + * @param o + * The offset into the mbuf data. + * @param t + * The type to cast the result into. + */ +#define rte_pktmbuf_mtod_offset(m, t, o) \ + ((t)((char *)(m)->buf_addr + (m)->data_off + (o))) + +/** + * A macro that points to the start of the data in the mbuf. + * + * The returned pointer is cast to type t. Before using this + * function, the user must ensure that the first segment is large + * enough to accommodate its data. + * + * @param m + * The packet mbuf. + * @param t + * The type to cast the result into. + */ +#define rte_pktmbuf_mtod(m, t) rte_pktmbuf_mtod_offset(m, t, 0) + +/** + * A macro that returns the physical address that points to an offset of the + * start of the data in the mbuf + * + * @param m + * The packet mbuf. + * @param o + * The offset into the data to calculate address from. + */ +#define rte_pktmbuf_mtophys_offset(m, o) \ + (phys_addr_t)((m)->buf_physaddr + (m)->data_off + (o)) + +/** + * A macro that returns the physical address that points to the start of the + * data in the mbuf + * + * @param m + * The packet mbuf. + */ +#define rte_pktmbuf_mtophys(m) rte_pktmbuf_mtophys_offset(m, 0) + +/** + * A macro that returns the length of the packet. + * + * The value can be read or assigned. + * + * @param m + * The packet mbuf. + */ +#define rte_pktmbuf_pkt_len(m) ((m)->pkt_len) + +/** + * A macro that returns the length of the segment. + * + * The value can be read or assigned. + * + * @param m + * The packet mbuf. + */ +#define rte_pktmbuf_data_len(m) ((m)->data_len) + +/** + * Prepend len bytes to an mbuf data area. + * + * Returns a pointer to the new + * data start address. If there is not enough headroom in the first + * segment, the function will return NULL, without modifying the mbuf. + * + * @param m + * The pkt mbuf. + * @param len + * The amount of data to prepend (in bytes). + * @return + * A pointer to the start of the newly prepended data, or + * NULL if there is not enough headroom space in the first segment + */ +static inline char *rte_pktmbuf_prepend(struct rte_mbuf *m, + uint16_t len) +{ + __rte_mbuf_sanity_check(m, 1); + + if (unlikely(len > rte_pktmbuf_headroom(m))) + return NULL; + + m->data_off -= len; + m->data_len = (uint16_t)(m->data_len + len); + m->pkt_len = (m->pkt_len + len); + + return (char *)m->buf_addr + m->data_off; +} + +/** + * Append len bytes to an mbuf. + * + * Append len bytes to an mbuf and return a pointer to the start address + * of the added data. If there is not enough tailroom in the last + * segment, the function will return NULL, without modifying the mbuf. + * + * @param m + * The packet mbuf. + * @param len + * The amount of data to append (in bytes). + * @return + * A pointer to the start of the newly appended data, or + * NULL if there is not enough tailroom space in the last segment + */ +static inline char *rte_pktmbuf_append(struct rte_mbuf *m, uint16_t len) +{ + void *tail; + struct rte_mbuf *m_last; + + __rte_mbuf_sanity_check(m, 1); + + m_last = rte_pktmbuf_lastseg(m); + if (unlikely(len > rte_pktmbuf_tailroom(m_last))) + return NULL; + + tail = (char *)m_last->buf_addr + m_last->data_off + m_last->data_len; + m_last->data_len = (uint16_t)(m_last->data_len + len); + m->pkt_len = (m->pkt_len + len); + return (char*) tail; +} + +/** + * Remove len bytes at the beginning of an mbuf. + * + * Returns a pointer to the start address of the new data area. If the + * length is greater than the length of the first segment, then the + * function will fail and return NULL, without modifying the mbuf. + * + * @param m + * The packet mbuf. + * @param len + * The amount of data to remove (in bytes). + * @return + * A pointer to the new start of the data. + */ +static inline char *rte_pktmbuf_adj(struct rte_mbuf *m, uint16_t len) +{ + __rte_mbuf_sanity_check(m, 1); + + if (unlikely(len > m->data_len)) + return NULL; + + m->data_len = (uint16_t)(m->data_len - len); + m->data_off += len; + m->pkt_len = (m->pkt_len - len); + return (char *)m->buf_addr + m->data_off; +} + +/** + * Remove len bytes of data at the end of the mbuf. + * + * If the length is greater than the length of the last segment, the + * function will fail and return -1 without modifying the mbuf. + * + * @param m + * The packet mbuf. + * @param len + * The amount of data to remove (in bytes). + * @return + * - 0: On success. + * - -1: On error. + */ +static inline int rte_pktmbuf_trim(struct rte_mbuf *m, uint16_t len) +{ + struct rte_mbuf *m_last; + + __rte_mbuf_sanity_check(m, 1); + + m_last = rte_pktmbuf_lastseg(m); + if (unlikely(len > m_last->data_len)) + return -1; + + m_last->data_len = (uint16_t)(m_last->data_len - len); + m->pkt_len = (m->pkt_len - len); + return 0; +} + +/** + * Test if mbuf data is contiguous. + * + * @param m + * The packet mbuf. + * @return + * - 1, if all data is contiguous (one segment). + * - 0, if there is several segments. + */ +static inline int rte_pktmbuf_is_contiguous(const struct rte_mbuf *m) +{ + __rte_mbuf_sanity_check(m, 1); + return !!(m->nb_segs == 1); +} + +/** + * Chain an mbuf to another, thereby creating a segmented packet. + * + * Note: The implementation will do a linear walk over the segments to find + * the tail entry. For cases when there are many segments, it's better to + * chain the entries manually. + * + * @param head + * The head of the mbuf chain (the first packet) + * @param tail + * The mbuf to put last in the chain + * + * @return + * - 0, on success. + * - -EOVERFLOW, if the chain is full (256 entries) + */ +static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail) +{ + struct rte_mbuf *cur_tail; + + /* Check for number-of-segments-overflow */ + if (head->nb_segs + tail->nb_segs >= 1 << (sizeof(head->nb_segs) * 8)) + return -EOVERFLOW; + + /* Chain 'tail' onto the old tail */ + cur_tail = rte_pktmbuf_lastseg(head); + cur_tail->next = tail; + + /* accumulate number of segments and total length. */ + head->nb_segs = (uint8_t)(head->nb_segs + tail->nb_segs); + head->pkt_len += tail->pkt_len; + + /* pkt_len is only set in the head */ + tail->pkt_len = tail->data_len; + + return 0; +} + +/** + * Dump an mbuf structure to the console. + * + * Dump all fields for the given packet mbuf and all its associated + * segments (in the case of a chained buffer). + * + * @param f + * A pointer to a file for output + * @param m + * The packet mbuf. + * @param dump_len + * If dump_len != 0, also dump the "dump_len" first data bytes of + * the packet. + */ +void rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MBUF_H_ */ diff --git a/src/dpdk22/lib/librte_mempool/rte_mempool.c b/src/dpdk22/lib/librte_mempool/rte_mempool.c new file mode 100644 index 00000000..aff5f6da --- /dev/null +++ b/src/dpdk22/lib/librte_mempool/rte_mempool.c @@ -0,0 +1,921 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_mempool.h" + +TAILQ_HEAD(rte_mempool_list, rte_tailq_entry); + +static struct rte_tailq_elem rte_mempool_tailq = { + .name = "RTE_MEMPOOL", +}; +EAL_REGISTER_TAILQ(rte_mempool_tailq) + +#define CACHE_FLUSHTHRESH_MULTIPLIER 1.5 +#define CALC_CACHE_FLUSHTHRESH(c) \ + ((typeof(c))((c) * CACHE_FLUSHTHRESH_MULTIPLIER)) + +/* + * return the greatest common divisor between a and b (fast algorithm) + * + */ +static unsigned get_gcd(unsigned a, unsigned b) +{ + unsigned c; + + if (0 == a) + return b; + if (0 == b) + return a; + + if (a < b) { + c = a; + a = b; + b = c; + } + + while (b != 0) { + c = a % b; + a = b; + b = c; + } + + return a; +} + +/* + * Depending on memory configuration, objects addresses are spread + * between channels and ranks in RAM: the pool allocator will add + * padding between objects. This function return the new size of the + * object. + */ +static unsigned optimize_object_size(unsigned obj_size) +{ + unsigned nrank, nchan; + unsigned new_obj_size; + + /* get number of channels */ + nchan = rte_memory_get_nchannel(); + if (nchan == 0) + nchan = 4; + + nrank = rte_memory_get_nrank(); + if (nrank == 0) + nrank = 1; + + /* process new object size */ + new_obj_size = (obj_size + RTE_MEMPOOL_ALIGN_MASK) / RTE_MEMPOOL_ALIGN; + while (get_gcd(new_obj_size, nrank * nchan) != 1) + new_obj_size++; + return new_obj_size * RTE_MEMPOOL_ALIGN; +} + +static void +mempool_add_elem(struct rte_mempool *mp, void *obj, uint32_t obj_idx, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg) +{ + struct rte_mempool_objhdr *hdr; + struct rte_mempool_objtlr *tlr __rte_unused; + + obj = (char *)obj + mp->header_size; + + /* set mempool ptr in header */ + hdr = RTE_PTR_SUB(obj, sizeof(*hdr)); + hdr->mp = mp; + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + hdr->cookie = RTE_MEMPOOL_HEADER_COOKIE2; + tlr = __mempool_get_trailer(obj); + tlr->cookie = RTE_MEMPOOL_TRAILER_COOKIE; +#endif + /* call the initializer */ + if (obj_init) + obj_init(mp, obj_init_arg, obj, obj_idx); + + /* enqueue in ring */ + rte_ring_sp_enqueue(mp->ring, obj); +} + +uint32_t +rte_mempool_obj_iter(void *vaddr, uint32_t elt_num, size_t elt_sz, size_t align, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift, + rte_mempool_obj_iter_t obj_iter, void *obj_iter_arg) +{ + uint32_t i, j, k; + uint32_t pgn, pgf; + uintptr_t end, start, va; + uintptr_t pg_sz; + + pg_sz = (uintptr_t)1 << pg_shift; + va = (uintptr_t)vaddr; + + i = 0; + j = 0; + + while (i != elt_num && j != pg_num) { + + start = RTE_ALIGN_CEIL(va, align); + end = start + elt_sz; + + /* index of the first page for the next element. */ + pgf = (end >> pg_shift) - (start >> pg_shift); + + /* index of the last page for the current element. */ + pgn = ((end - 1) >> pg_shift) - (start >> pg_shift); + pgn += j; + + /* do we have enough space left for the element. */ + if (pgn >= pg_num) + break; + + for (k = j; + k != pgn && + paddr[k] + pg_sz == paddr[k + 1]; + k++) + ; + + /* + * if next pgn chunks of memory physically continuous, + * use it to create next element. + * otherwise, just skip that chunk unused. + */ + if (k == pgn) { + if (obj_iter != NULL) + obj_iter(obj_iter_arg, (void *)start, + (void *)end, i); + va = end; + j += pgf; + i++; + } else { + va = RTE_ALIGN_CEIL((va + 1), pg_sz); + j++; + } + } + + return i; +} + +/* + * Populate mempool with the objects. + */ + +struct mempool_populate_arg { + struct rte_mempool *mp; + rte_mempool_obj_ctor_t *obj_init; + void *obj_init_arg; +}; + +static void +mempool_obj_populate(void *arg, void *start, void *end, uint32_t idx) +{ + struct mempool_populate_arg *pa = arg; + + mempool_add_elem(pa->mp, start, idx, pa->obj_init, pa->obj_init_arg); + pa->mp->elt_va_end = (uintptr_t)end; +} + +static void +mempool_populate(struct rte_mempool *mp, size_t num, size_t align, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg) +{ + uint32_t elt_sz; + struct mempool_populate_arg arg; + + elt_sz = mp->elt_size + mp->header_size + mp->trailer_size; + arg.mp = mp; + arg.obj_init = obj_init; + arg.obj_init_arg = obj_init_arg; + + mp->size = rte_mempool_obj_iter((void *)mp->elt_va_start, + num, elt_sz, align, + mp->elt_pa, mp->pg_num, mp->pg_shift, + mempool_obj_populate, &arg); +} + +uint32_t +rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags, + struct rte_mempool_objsz *sz) +{ + struct rte_mempool_objsz lsz; + + sz = (sz != NULL) ? sz : &lsz; + + /* + * In header, we have at least the pointer to the pool, and + * optionaly a 64 bits cookie. + */ + sz->header_size = 0; + sz->header_size += sizeof(struct rte_mempool *); /* ptr to pool */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + sz->header_size += sizeof(uint64_t); /* cookie */ +#endif + if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0) + sz->header_size = RTE_ALIGN_CEIL(sz->header_size, + RTE_MEMPOOL_ALIGN); + + /* trailer contains the cookie in debug mode */ + sz->trailer_size = 0; +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + sz->trailer_size += sizeof(uint64_t); /* cookie */ +#endif + /* element size is 8 bytes-aligned at least */ + sz->elt_size = RTE_ALIGN_CEIL(elt_size, sizeof(uint64_t)); + + /* expand trailer to next cache line */ + if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0) { + sz->total_size = sz->header_size + sz->elt_size + + sz->trailer_size; + sz->trailer_size += ((RTE_MEMPOOL_ALIGN - + (sz->total_size & RTE_MEMPOOL_ALIGN_MASK)) & + RTE_MEMPOOL_ALIGN_MASK); + } + + /* + * increase trailer to add padding between objects in order to + * spread them across memory channels/ranks + */ + if ((flags & MEMPOOL_F_NO_SPREAD) == 0) { + unsigned new_size; + new_size = optimize_object_size(sz->header_size + sz->elt_size + + sz->trailer_size); + sz->trailer_size = new_size - sz->header_size - sz->elt_size; + } + + if (! rte_eal_has_hugepages()) { + /* + * compute trailer size so that pool elements fit exactly in + * a standard page + */ + int page_size = getpagesize(); + int new_size = page_size - sz->header_size - sz->elt_size; + if (new_size < 0 || (unsigned int)new_size < sz->trailer_size) { + printf("When hugepages are disabled, pool objects " + "can't exceed PAGE_SIZE: %d + %d + %d > %d\n", + sz->header_size, sz->elt_size, sz->trailer_size, + page_size); + return 0; + } + sz->trailer_size = new_size; + } + + /* this is the size of an object, including header and trailer */ + sz->total_size = sz->header_size + sz->elt_size + sz->trailer_size; + + return sz->total_size; +} + + +/* + * Calculate maximum amount of memory required to store given number of objects. + */ +size_t +rte_mempool_xmem_size(uint32_t elt_num, size_t elt_sz, uint32_t pg_shift) +{ + size_t n, pg_num, pg_sz, sz; + + pg_sz = (size_t)1 << pg_shift; + + if ((n = pg_sz / elt_sz) > 0) { + pg_num = (elt_num + n - 1) / n; + sz = pg_num << pg_shift; + } else { + sz = RTE_ALIGN_CEIL(elt_sz, pg_sz) * elt_num; + } + + return sz; +} + +/* + * Calculate how much memory would be actually required with the + * given memory footprint to store required number of elements. + */ +static void +mempool_lelem_iter(void *arg, __rte_unused void *start, void *end, + __rte_unused uint32_t idx) +{ + *(uintptr_t *)arg = (uintptr_t)end; +} + +ssize_t +rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, size_t elt_sz, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift) +{ + uint32_t n; + uintptr_t va, uv; + size_t pg_sz, usz; + + pg_sz = (size_t)1 << pg_shift; + va = (uintptr_t)vaddr; + uv = va; + + if ((n = rte_mempool_obj_iter(vaddr, elt_num, elt_sz, 1, + paddr, pg_num, pg_shift, mempool_lelem_iter, + &uv)) != elt_num) { + return -(ssize_t)n; + } + + uv = RTE_ALIGN_CEIL(uv, pg_sz); + usz = uv - va; + return usz; +} + +#ifndef RTE_LIBRTE_XEN_DOM0 +/* stub if DOM0 support not configured */ +struct rte_mempool * +rte_dom0_mempool_create(const char *name __rte_unused, + unsigned n __rte_unused, + unsigned elt_size __rte_unused, + unsigned cache_size __rte_unused, + unsigned private_data_size __rte_unused, + rte_mempool_ctor_t *mp_init __rte_unused, + void *mp_init_arg __rte_unused, + rte_mempool_obj_ctor_t *obj_init __rte_unused, + void *obj_init_arg __rte_unused, + int socket_id __rte_unused, + unsigned flags __rte_unused) +{ + rte_errno = EINVAL; + return NULL; +} +#endif + +/* create the mempool */ +struct rte_mempool * +rte_mempool_create(const char *name, unsigned n, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags) +{ + if (rte_xen_dom0_supported()) + return rte_dom0_mempool_create(name, n, elt_size, + cache_size, private_data_size, + mp_init, mp_init_arg, + obj_init, obj_init_arg, + socket_id, flags); + else + return rte_mempool_xmem_create(name, n, elt_size, + cache_size, private_data_size, + mp_init, mp_init_arg, + obj_init, obj_init_arg, + socket_id, flags, + NULL, NULL, MEMPOOL_PG_NUM_DEFAULT, + MEMPOOL_PG_SHIFT_MAX); +} + +/* + * Create the mempool over already allocated chunk of memory. + * That external memory buffer can consists of physically disjoint pages. + * Setting vaddr to NULL, makes mempool to fallback to original behaviour + * and allocate space for mempool and it's elements as one big chunk of + * physically continuos memory. + * */ +struct rte_mempool * +rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags, void *vaddr, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift) +{ + char mz_name[RTE_MEMZONE_NAMESIZE]; + char rg_name[RTE_RING_NAMESIZE]; + struct rte_mempool_list *mempool_list; + struct rte_mempool *mp = NULL; + struct rte_tailq_entry *te; + struct rte_ring *r; + const struct rte_memzone *mz; + size_t mempool_size; + int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY; + int rg_flags = 0; + void *obj; + struct rte_mempool_objsz objsz; + void *startaddr; + int page_size = getpagesize(); + + /* compilation-time checks */ + RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) & + RTE_CACHE_LINE_MASK) != 0); +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 + RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) & + RTE_CACHE_LINE_MASK) != 0); + RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) & + RTE_CACHE_LINE_MASK) != 0); +#endif +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) & + RTE_CACHE_LINE_MASK) != 0); + RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, stats) & + RTE_CACHE_LINE_MASK) != 0); +#endif + + mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list); + + /* asked cache too big */ + if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE || + CALC_CACHE_FLUSHTHRESH(cache_size) > n) { + rte_errno = EINVAL; + return NULL; + } + + /* check that we have both VA and PA */ + if (vaddr != NULL && paddr == NULL) { + rte_errno = EINVAL; + return NULL; + } + + /* Check that pg_num and pg_shift parameters are valid. */ + if (pg_num < RTE_DIM(mp->elt_pa) || pg_shift > MEMPOOL_PG_SHIFT_MAX) { + rte_errno = EINVAL; + return NULL; + } + + /* "no cache align" imply "no spread" */ + if (flags & MEMPOOL_F_NO_CACHE_ALIGN) + flags |= MEMPOOL_F_NO_SPREAD; + + /* ring flags */ + if (flags & MEMPOOL_F_SP_PUT) + rg_flags |= RING_F_SP_ENQ; + if (flags & MEMPOOL_F_SC_GET) + rg_flags |= RING_F_SC_DEQ; + + /* calculate mempool object sizes. */ + if (!rte_mempool_calc_obj_size(elt_size, flags, &objsz)) { + rte_errno = EINVAL; + return NULL; + } + + rte_rwlock_write_lock(RTE_EAL_MEMPOOL_RWLOCK); + + /* allocate the ring that will be used to store objects */ + /* Ring functions will return appropriate errors if we are + * running as a secondary process etc., so no checks made + * in this function for that condition */ + snprintf(rg_name, sizeof(rg_name), RTE_MEMPOOL_MZ_FORMAT, name); + r = rte_ring_create(rg_name, rte_align32pow2(n+1), socket_id, rg_flags); + if (r == NULL) + goto exit; + + /* + * reserve a memory zone for this mempool: private data is + * cache-aligned + */ + private_data_size = (private_data_size + + RTE_MEMPOOL_ALIGN_MASK) & (~RTE_MEMPOOL_ALIGN_MASK); + + if (! rte_eal_has_hugepages()) { + /* + * expand private data size to a whole page, so that the + * first pool element will start on a new standard page + */ + int head = sizeof(struct rte_mempool); + int new_size = (private_data_size + head) % page_size; + if (new_size) { + private_data_size += page_size - new_size; + } + } + + /* try to allocate tailq entry */ + te = rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, MEMPOOL, "Cannot allocate tailq entry!\n"); + goto exit; + } + + /* + * If user provided an external memory buffer, then use it to + * store mempool objects. Otherwise reserve a memzone that is large + * enough to hold mempool header and metadata plus mempool objects. + */ + mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size; + mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN); + if (vaddr == NULL) + mempool_size += (size_t)objsz.total_size * n; + + if (! rte_eal_has_hugepages()) { + /* + * we want the memory pool to start on a page boundary, + * because pool elements crossing page boundaries would + * result in discontiguous physical addresses + */ + mempool_size += page_size; + } + + snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT, name); + + mz = rte_memzone_reserve(mz_name, mempool_size, socket_id, mz_flags); + + /* + * no more memory: in this case we loose previously reserved + * space for the ring as we cannot free it + */ + if (mz == NULL) { + rte_free(te); + goto exit; + } + + if (rte_eal_has_hugepages()) { + startaddr = (void*)mz->addr; + } else { + /* align memory pool start address on a page boundary */ + unsigned long addr = (unsigned long)mz->addr; + if (addr & (page_size - 1)) { + addr += page_size; + addr &= ~(page_size - 1); + } + startaddr = (void*)addr; + } + + /* init the mempool structure */ + mp = startaddr; + memset(mp, 0, sizeof(*mp)); + snprintf(mp->name, sizeof(mp->name), "%s", name); + mp->phys_addr = mz->phys_addr; + mp->ring = r; + mp->size = n; + mp->flags = flags; + mp->elt_size = objsz.elt_size; + mp->header_size = objsz.header_size; + mp->trailer_size = objsz.trailer_size; + mp->cache_size = cache_size; + mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size); + mp->private_data_size = private_data_size; + + /* calculate address of the first element for continuous mempool. */ + obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) + + private_data_size; + obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN); + + /* populate address translation fields. */ + mp->pg_num = pg_num; + mp->pg_shift = pg_shift; + mp->pg_mask = RTE_LEN2MASK(mp->pg_shift, typeof(mp->pg_mask)); + + /* mempool elements allocated together with mempool */ + if (vaddr == NULL) { + mp->elt_va_start = (uintptr_t)obj; + mp->elt_pa[0] = mp->phys_addr + + (mp->elt_va_start - (uintptr_t)mp); + + /* mempool elements in a separate chunk of memory. */ + } else { + mp->elt_va_start = (uintptr_t)vaddr; + memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num); + } + + mp->elt_va_end = mp->elt_va_start; + + /* call the initializer */ + if (mp_init) + mp_init(mp, mp_init_arg); + + mempool_populate(mp, n, 1, obj_init, obj_init_arg); + + te->data = (void *) mp; + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_INSERT_TAIL(mempool_list, te, next); + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + +exit: + rte_rwlock_write_unlock(RTE_EAL_MEMPOOL_RWLOCK); + + return mp; +} + +/* Return the number of entries in the mempool */ +unsigned +rte_mempool_count(const struct rte_mempool *mp) +{ + unsigned count; + + count = rte_ring_count(mp->ring); + +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 + { + unsigned lcore_id; + if (mp->cache_size == 0) + return count; + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) + count += mp->local_cache[lcore_id].len; + } +#endif + + /* + * due to race condition (access to len is not locked), the + * total can be greater than size... so fix the result + */ + if (count > mp->size) + return mp->size; + return count; +} + +/* dump the cache status */ +static unsigned +rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp) +{ +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 + unsigned lcore_id; + unsigned count = 0; + unsigned cache_count; + + fprintf(f, " cache infos:\n"); + fprintf(f, " cache_size=%"PRIu32"\n", mp->cache_size); + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + cache_count = mp->local_cache[lcore_id].len; + fprintf(f, " cache_count[%u]=%u\n", lcore_id, cache_count); + count += cache_count; + } + fprintf(f, " total_cache_count=%u\n", count); + return count; +#else + RTE_SET_USED(mp); + fprintf(f, " cache disabled\n"); + return 0; +#endif +} + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG +/* check cookies before and after objects */ +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wcast-qual" +#endif + +struct mempool_audit_arg { + const struct rte_mempool *mp; + uintptr_t obj_end; + uint32_t obj_num; +}; + +static void +mempool_obj_audit(void *arg, void *start, void *end, uint32_t idx) +{ + struct mempool_audit_arg *pa = arg; + void *obj; + + obj = (char *)start + pa->mp->header_size; + pa->obj_end = (uintptr_t)end; + pa->obj_num = idx + 1; + __mempool_check_cookies(pa->mp, &obj, 1, 2); +} + +static void +mempool_audit_cookies(const struct rte_mempool *mp) +{ + uint32_t elt_sz, num; + struct mempool_audit_arg arg; + + elt_sz = mp->elt_size + mp->header_size + mp->trailer_size; + + arg.mp = mp; + arg.obj_end = mp->elt_va_start; + arg.obj_num = 0; + + num = rte_mempool_obj_iter((void *)mp->elt_va_start, + mp->size, elt_sz, 1, + mp->elt_pa, mp->pg_num, mp->pg_shift, + mempool_obj_audit, &arg); + + if (num != mp->size) { + rte_panic("rte_mempool_obj_iter(mempool=%p, size=%u) " + "iterated only over %u elements\n", + mp, mp->size, num); + } else if (arg.obj_end != mp->elt_va_end || arg.obj_num != mp->size) { + rte_panic("rte_mempool_obj_iter(mempool=%p, size=%u) " + "last callback va_end: %#tx (%#tx expeceted), " + "num of objects: %u (%u expected)\n", + mp, mp->size, + arg.obj_end, mp->elt_va_end, + arg.obj_num, mp->size); + } +} + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic error "-Wcast-qual" +#endif +#else +#define mempool_audit_cookies(mp) do {} while(0) +#endif + +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 +/* check cookies before and after objects */ +static void +mempool_audit_cache(const struct rte_mempool *mp) +{ + /* check cache size consistency */ + unsigned lcore_id; + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) { + RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n", + lcore_id); + rte_panic("MEMPOOL: invalid cache len\n"); + } + } +} +#else +#define mempool_audit_cache(mp) do {} while(0) +#endif + + +/* check the consistency of mempool (size, cookies, ...) */ +void +rte_mempool_audit(const struct rte_mempool *mp) +{ + mempool_audit_cache(mp); + mempool_audit_cookies(mp); + + /* For case where mempool DEBUG is not set, and cache size is 0 */ + RTE_SET_USED(mp); +} + +/* dump the status of the mempool on the console */ +void +rte_mempool_dump(FILE *f, const struct rte_mempool *mp) +{ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + struct rte_mempool_debug_stats sum; + unsigned lcore_id; +#endif + unsigned common_count; + unsigned cache_count; + + RTE_VERIFY(f != NULL); + RTE_VERIFY(mp != NULL); + + fprintf(f, "mempool <%s>@%p\n", mp->name, mp); + fprintf(f, " flags=%x\n", mp->flags); + fprintf(f, " ring=<%s>@%p\n", mp->ring->name, mp->ring); + fprintf(f, " phys_addr=0x%" PRIx64 "\n", mp->phys_addr); + fprintf(f, " size=%"PRIu32"\n", mp->size); + fprintf(f, " header_size=%"PRIu32"\n", mp->header_size); + fprintf(f, " elt_size=%"PRIu32"\n", mp->elt_size); + fprintf(f, " trailer_size=%"PRIu32"\n", mp->trailer_size); + fprintf(f, " total_obj_size=%"PRIu32"\n", + mp->header_size + mp->elt_size + mp->trailer_size); + + fprintf(f, " private_data_size=%"PRIu32"\n", mp->private_data_size); + fprintf(f, " pg_num=%"PRIu32"\n", mp->pg_num); + fprintf(f, " pg_shift=%"PRIu32"\n", mp->pg_shift); + fprintf(f, " pg_mask=%#tx\n", mp->pg_mask); + fprintf(f, " elt_va_start=%#tx\n", mp->elt_va_start); + fprintf(f, " elt_va_end=%#tx\n", mp->elt_va_end); + fprintf(f, " elt_pa[0]=0x%" PRIx64 "\n", mp->elt_pa[0]); + + if (mp->size != 0) + fprintf(f, " avg bytes/object=%#Lf\n", + (long double)(mp->elt_va_end - mp->elt_va_start) / + mp->size); + + cache_count = rte_mempool_dump_cache(f, mp); + common_count = rte_ring_count(mp->ring); + if ((cache_count + common_count) > mp->size) + common_count = mp->size - cache_count; + fprintf(f, " common_pool_count=%u\n", common_count); + + /* sum and dump statistics */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + memset(&sum, 0, sizeof(sum)); + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + sum.put_bulk += mp->stats[lcore_id].put_bulk; + sum.put_objs += mp->stats[lcore_id].put_objs; + sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk; + sum.get_success_objs += mp->stats[lcore_id].get_success_objs; + sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk; + sum.get_fail_objs += mp->stats[lcore_id].get_fail_objs; + } + fprintf(f, " stats:\n"); + fprintf(f, " put_bulk=%"PRIu64"\n", sum.put_bulk); + fprintf(f, " put_objs=%"PRIu64"\n", sum.put_objs); + fprintf(f, " get_success_bulk=%"PRIu64"\n", sum.get_success_bulk); + fprintf(f, " get_success_objs=%"PRIu64"\n", sum.get_success_objs); + fprintf(f, " get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk); + fprintf(f, " get_fail_objs=%"PRIu64"\n", sum.get_fail_objs); +#else + fprintf(f, " no statistics available\n"); +#endif + + rte_mempool_audit(mp); +} + +/* dump the status of all mempools on the console */ +void +rte_mempool_list_dump(FILE *f) +{ + const struct rte_mempool *mp = NULL; + struct rte_tailq_entry *te; + struct rte_mempool_list *mempool_list; + + mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list); + + rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK); + + TAILQ_FOREACH(te, mempool_list, next) { + mp = (struct rte_mempool *) te->data; + rte_mempool_dump(f, mp); + } + + rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK); +} + +/* search a mempool from its name */ +struct rte_mempool * +rte_mempool_lookup(const char *name) +{ + struct rte_mempool *mp = NULL; + struct rte_tailq_entry *te; + struct rte_mempool_list *mempool_list; + + mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list); + + rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK); + + TAILQ_FOREACH(te, mempool_list, next) { + mp = (struct rte_mempool *) te->data; + if (strncmp(name, mp->name, RTE_MEMPOOL_NAMESIZE) == 0) + break; + } + + rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + + return mp; +} + +void rte_mempool_walk(void (*func)(const struct rte_mempool *, void *), + void *arg) +{ + struct rte_tailq_entry *te = NULL; + struct rte_mempool_list *mempool_list; + + mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list); + + rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK); + + TAILQ_FOREACH(te, mempool_list, next) { + (*func)((struct rte_mempool *) te->data, arg); + } + + rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK); +} diff --git a/src/dpdk22/lib/librte_mempool/rte_mempool.h b/src/dpdk22/lib/librte_mempool/rte_mempool.h new file mode 100644 index 00000000..6e2390a1 --- /dev/null +++ b/src/dpdk22/lib/librte_mempool/rte_mempool.h @@ -0,0 +1,1408 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMPOOL_H_ +#define _RTE_MEMPOOL_H_ + +/** + * @file + * RTE Mempool. + * + * A memory pool is an allocator of fixed-size object. It is + * identified by its name, and uses a ring to store free objects. It + * provides some other optional services, like a per-core object + * cache, and an alignment helper to ensure that objects are padded + * to spread them equally on all RAM channels, ranks, and so on. + * + * Objects owned by a mempool should never be added in another + * mempool. When an object is freed using rte_mempool_put() or + * equivalent, the object data is not modified; the user can save some + * meta-data in the object data and retrieve them when allocating a + * new object. + * + * Note: the mempool implementation is not preemptable. A lcore must + * not be interrupted by another task that uses the same mempool + * (because it uses a ring which is not preemptable). Also, mempool + * functions must not be used outside the DPDK environment: for + * example, in linuxapp environment, a thread that is not created by + * the EAL must not use mempools. This is due to the per-lcore cache + * that won't work as rte_lcore_id() will not return a correct value. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_MEMPOOL_HEADER_COOKIE1 0xbadbadbadadd2e55ULL /**< Header cookie. */ +#define RTE_MEMPOOL_HEADER_COOKIE2 0xf2eef2eedadd2e55ULL /**< Header cookie. */ +#define RTE_MEMPOOL_TRAILER_COOKIE 0xadd2e55badbadbadULL /**< Trailer cookie.*/ + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG +/** + * A structure that stores the mempool statistics (per-lcore). + */ +struct rte_mempool_debug_stats { + uint64_t put_bulk; /**< Number of puts. */ + uint64_t put_objs; /**< Number of objects successfully put. */ + uint64_t get_success_bulk; /**< Successful allocation number. */ + uint64_t get_success_objs; /**< Objects successfully allocated. */ + uint64_t get_fail_bulk; /**< Failed allocation number. */ + uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ +} __rte_cache_aligned; +#endif + +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 +/** + * A structure that stores a per-core object cache. + */ +struct rte_mempool_cache { + unsigned len; /**< Cache len */ + /* + * Cache is allocated to this size to allow it to overflow in certain + * cases to avoid needless emptying of cache. + */ + void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */ +} __rte_cache_aligned; +#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ + +/** + * A structure that stores the size of mempool elements. + */ +struct rte_mempool_objsz { + uint32_t elt_size; /**< Size of an element. */ + uint32_t header_size; /**< Size of header (before elt). */ + uint32_t trailer_size; /**< Size of trailer (after elt). */ + uint32_t total_size; + /**< Total size of an object (header + elt + trailer). */ +}; + +#define RTE_MEMPOOL_NAMESIZE 32 /**< Maximum length of a memory pool. */ +#define RTE_MEMPOOL_MZ_PREFIX "MP_" + +/* "MP_" */ +#define RTE_MEMPOOL_MZ_FORMAT RTE_MEMPOOL_MZ_PREFIX "%s" + +#ifdef RTE_LIBRTE_XEN_DOM0 + +/* "_MP_elt" */ +#define RTE_MEMPOOL_OBJ_NAME "%s_" RTE_MEMPOOL_MZ_PREFIX "elt" + +#else + +#define RTE_MEMPOOL_OBJ_NAME RTE_MEMPOOL_MZ_FORMAT + +#endif /* RTE_LIBRTE_XEN_DOM0 */ + +#define MEMPOOL_PG_SHIFT_MAX (sizeof(uintptr_t) * CHAR_BIT - 1) + +/** Mempool over one chunk of physically continuous memory */ +#define MEMPOOL_PG_NUM_DEFAULT 1 + +#ifndef RTE_MEMPOOL_ALIGN +#define RTE_MEMPOOL_ALIGN RTE_CACHE_LINE_SIZE +#endif + +#define RTE_MEMPOOL_ALIGN_MASK (RTE_MEMPOOL_ALIGN - 1) + +/** + * Mempool object header structure + * + * Each object stored in mempools are prefixed by this header structure, + * it allows to retrieve the mempool pointer from the object. When debug + * is enabled, a cookie is also added in this structure preventing + * corruptions and double-frees. + */ +struct rte_mempool_objhdr { + struct rte_mempool *mp; /**< The mempool owning the object. */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + uint64_t cookie; /**< Debug cookie. */ +#endif +}; + +/** + * Mempool object trailer structure + * + * In debug mode, each object stored in mempools are suffixed by this + * trailer structure containing a cookie preventing memory corruptions. + */ +struct rte_mempool_objtlr { +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + uint64_t cookie; /**< Debug cookie. */ +#endif +}; + +/** + * The RTE mempool structure. + */ +struct rte_mempool { + char name[RTE_MEMPOOL_NAMESIZE]; /**< Name of mempool. */ + struct rte_ring *ring; /**< Ring to store objects. */ + phys_addr_t phys_addr; /**< Phys. addr. of mempool struct. */ + int flags; /**< Flags of the mempool. */ + uint32_t size; /**< Size of the mempool. */ + uint32_t cache_size; /**< Size of per-lcore local cache. */ + uint32_t cache_flushthresh; + /**< Threshold before we flush excess elements. */ + + uint32_t elt_size; /**< Size of an element. */ + uint32_t header_size; /**< Size of header (before elt). */ + uint32_t trailer_size; /**< Size of trailer (after elt). */ + + unsigned private_data_size; /**< Size of private data. */ + +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 + /** Per-lcore local cache. */ + struct rte_mempool_cache local_cache[RTE_MAX_LCORE]; +#endif + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + /** Per-lcore statistics. */ + struct rte_mempool_debug_stats stats[RTE_MAX_LCORE]; +#endif + + /* Address translation support, starts from next cache line. */ + + /** Number of elements in the elt_pa array. */ + uint32_t pg_num __rte_cache_aligned; + uint32_t pg_shift; /**< LOG2 of the physical pages. */ + uintptr_t pg_mask; /**< physical page mask value. */ + uintptr_t elt_va_start; + /**< Virtual address of the first mempool object. */ + uintptr_t elt_va_end; + /**< Virtual address of the mempool object. */ + phys_addr_t elt_pa[MEMPOOL_PG_NUM_DEFAULT]; + /**< Array of physical page addresses for the mempool objects buffer. */ + +} __rte_cache_aligned; + +#define MEMPOOL_F_NO_SPREAD 0x0001 /**< Do not spread in memory. */ +#define MEMPOOL_F_NO_CACHE_ALIGN 0x0002 /**< Do not align objs on cache lines.*/ +#define MEMPOOL_F_SP_PUT 0x0004 /**< Default put is "single-producer".*/ +#define MEMPOOL_F_SC_GET 0x0008 /**< Default get is "single-consumer".*/ + +/** + * @internal When debug is enabled, store some statistics. + * + * @param mp + * Pointer to the memory pool. + * @param name + * Name of the statistics field to increment in the memory pool. + * @param n + * Number to add to the object-oriented statistics. + */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG +#define __MEMPOOL_STAT_ADD(mp, name, n) do { \ + unsigned __lcore_id = rte_lcore_id(); \ + if (__lcore_id < RTE_MAX_LCORE) { \ + mp->stats[__lcore_id].name##_objs += n; \ + mp->stats[__lcore_id].name##_bulk += 1; \ + } \ + } while(0) +#else +#define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0) +#endif + +/** + * Calculate the size of the mempool header. + * + * @param mp + * Pointer to the memory pool. + * @param pgn + * Number of pages used to store mempool objects. + */ +#define MEMPOOL_HEADER_SIZE(mp, pgn) (sizeof(*(mp)) + \ + RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \ + sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE)) + +/** + * Return true if the whole mempool is in contiguous memory. + */ +#define MEMPOOL_IS_CONTIG(mp) \ + ((mp)->pg_num == MEMPOOL_PG_NUM_DEFAULT && \ + (mp)->phys_addr == (mp)->elt_pa[0]) + +/* return the header of a mempool object (internal) */ +static inline struct rte_mempool_objhdr *__mempool_get_header(void *obj) +{ + return (struct rte_mempool_objhdr *)RTE_PTR_SUB(obj, sizeof(struct rte_mempool_objhdr)); +} + +/** + * Return a pointer to the mempool owning this object. + * + * @param obj + * An object that is owned by a pool. If this is not the case, + * the behavior is undefined. + * @return + * A pointer to the mempool structure. + */ +static inline struct rte_mempool *rte_mempool_from_obj(void *obj) +{ + struct rte_mempool_objhdr *hdr = __mempool_get_header(obj); + return hdr->mp; +} + +/* return the trailer of a mempool object (internal) */ +static inline struct rte_mempool_objtlr *__mempool_get_trailer(void *obj) +{ + struct rte_mempool *mp = rte_mempool_from_obj(obj); + return (struct rte_mempool_objtlr *)RTE_PTR_ADD(obj, mp->elt_size); +} + +/** + * @internal Check and update cookies or panic. + * + * @param mp + * Pointer to the memory pool. + * @param obj_table_const + * Pointer to a table of void * pointers (objects). + * @param n + * Index of object in object table. + * @param free + * - 0: object is supposed to be allocated, mark it as free + * - 1: object is supposed to be free, mark it as allocated + * - 2: just check that cookie is valid (free or allocated) + */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wcast-qual" +#endif +static inline void __mempool_check_cookies(const struct rte_mempool *mp, + void * const *obj_table_const, + unsigned n, int free) +{ + struct rte_mempool_objhdr *hdr; + struct rte_mempool_objtlr *tlr; + uint64_t cookie; + void *tmp; + void *obj; + void **obj_table; + + /* Force to drop the "const" attribute. This is done only when + * DEBUG is enabled */ + tmp = (void *) obj_table_const; + obj_table = (void **) tmp; + + while (n--) { + obj = obj_table[n]; + + if (rte_mempool_from_obj(obj) != mp) + rte_panic("MEMPOOL: object is owned by another " + "mempool\n"); + + hdr = __mempool_get_header(obj); + cookie = hdr->cookie; + + if (free == 0) { + if (cookie != RTE_MEMPOOL_HEADER_COOKIE1) { + rte_log_set_history(0); + RTE_LOG(CRIT, MEMPOOL, + "obj=%p, mempool=%p, cookie=%" PRIx64 "\n", + obj, (const void *) mp, cookie); + rte_panic("MEMPOOL: bad header cookie (put)\n"); + } + hdr->cookie = RTE_MEMPOOL_HEADER_COOKIE2; + } + else if (free == 1) { + if (cookie != RTE_MEMPOOL_HEADER_COOKIE2) { + rte_log_set_history(0); + RTE_LOG(CRIT, MEMPOOL, + "obj=%p, mempool=%p, cookie=%" PRIx64 "\n", + obj, (const void *) mp, cookie); + rte_panic("MEMPOOL: bad header cookie (get)\n"); + } + hdr->cookie = RTE_MEMPOOL_HEADER_COOKIE1; + } + else if (free == 2) { + if (cookie != RTE_MEMPOOL_HEADER_COOKIE1 && + cookie != RTE_MEMPOOL_HEADER_COOKIE2) { + rte_log_set_history(0); + RTE_LOG(CRIT, MEMPOOL, + "obj=%p, mempool=%p, cookie=%" PRIx64 "\n", + obj, (const void *) mp, cookie); + rte_panic("MEMPOOL: bad header cookie (audit)\n"); + } + } + tlr = __mempool_get_trailer(obj); + cookie = tlr->cookie; + if (cookie != RTE_MEMPOOL_TRAILER_COOKIE) { + rte_log_set_history(0); + RTE_LOG(CRIT, MEMPOOL, + "obj=%p, mempool=%p, cookie=%" PRIx64 "\n", + obj, (const void *) mp, cookie); + rte_panic("MEMPOOL: bad trailer cookie\n"); + } + } +} +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic error "-Wcast-qual" +#endif +#else +#define __mempool_check_cookies(mp, obj_table_const, n, free) do {} while(0) +#endif /* RTE_LIBRTE_MEMPOOL_DEBUG */ + +/** + * A mempool object iterator callback function. + */ +typedef void (*rte_mempool_obj_iter_t)(void * /*obj_iter_arg*/, + void * /*obj_start*/, + void * /*obj_end*/, + uint32_t /*obj_index */); + +/** + * Call a function for each mempool object in a memory chunk + * + * Iterate across objects of the given size and alignment in the + * provided chunk of memory. The given memory buffer can consist of + * disjointed physical pages. + * + * For each object, call the provided callback (if any). This function + * is used to populate a mempool, or walk through all the elements of a + * mempool, or estimate how many elements of the given size could be + * created in the given memory buffer. + * + * @param vaddr + * Virtual address of the memory buffer. + * @param elt_num + * Maximum number of objects to iterate through. + * @param elt_sz + * Size of each object. + * @param align + * Alignment of each object. + * @param paddr + * Array of physical addresses of the pages that comprises given memory + * buffer. + * @param pg_num + * Number of elements in the paddr array. + * @param pg_shift + * LOG2 of the physical pages size. + * @param obj_iter + * Object iterator callback function (could be NULL). + * @param obj_iter_arg + * User defined parameter for the object iterator callback function. + * + * @return + * Number of objects iterated through. + */ +uint32_t rte_mempool_obj_iter(void *vaddr, + uint32_t elt_num, size_t elt_sz, size_t align, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift, + rte_mempool_obj_iter_t obj_iter, void *obj_iter_arg); + +/** + * An object constructor callback function for mempool. + * + * Arguments are the mempool, the opaque pointer given by the user in + * rte_mempool_create(), the pointer to the element and the index of + * the element in the pool. + */ +typedef void (rte_mempool_obj_ctor_t)(struct rte_mempool *, void *, + void *, unsigned); + +/** + * A mempool constructor callback function. + * + * Arguments are the mempool and the opaque pointer given by the user in + * rte_mempool_create(). + */ +typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *); + +/** + * Create a new mempool named *name* in memory. + * + * This function uses ``memzone_reserve()`` to allocate memory. The + * pool contains n elements of elt_size. Its size is set to n. + * All elements of the mempool are allocated together with the mempool header, + * in one physically continuous chunk of memory. + * + * @param name + * The name of the mempool. + * @param n + * The number of elements in the mempool. The optimum size (in terms of + * memory usage) for a mempool is when n is a power of two minus one: + * n = (2^q - 1). + * @param elt_size + * The size of each element. + * @param cache_size + * If cache_size is non-zero, the rte_mempool library will try to + * limit the accesses to the common lockless pool, by maintaining a + * per-lcore object cache. This argument must be lower or equal to + * CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE and n / 1.5. It is advised to choose + * cache_size to have "n modulo cache_size == 0": if this is + * not the case, some elements will always stay in the pool and will + * never be used. The access to the per-lcore table is of course + * faster than the multi-producer/consumer pool. The cache can be + * disabled if the cache_size argument is set to 0; it can be useful to + * avoid losing objects in cache. Note that even if not used, the + * memory space for cache is always reserved in a mempool structure, + * except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0. + * @param private_data_size + * The size of the private data appended after the mempool + * structure. This is useful for storing some private data after the + * mempool structure, as is done for rte_mbuf_pool for example. + * @param mp_init + * A function pointer that is called for initialization of the pool, + * before object initialization. The user can initialize the private + * data in this function if needed. This parameter can be NULL if + * not needed. + * @param mp_init_arg + * An opaque pointer to data that can be used in the mempool + * constructor function. + * @param obj_init + * A function pointer that is called for each object at + * initialization of the pool. The user can set some meta data in + * objects if needed. This parameter can be NULL if not needed. + * The obj_init() function takes the mempool pointer, the init_arg, + * the object pointer and the object number as parameters. + * @param obj_init_arg + * An opaque pointer to data that can be used as an argument for + * each call to the object constructor function. + * @param socket_id + * The *socket_id* argument is the socket identifier in the case of + * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The *flags* arguments is an OR of following flags: + * - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread + * between channels in RAM: the pool allocator will add padding + * between objects depending on the hardware configuration. See + * Memory alignment constraints for details. If this flag is set, + * the allocator will just align them to a cache line. + * - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are + * cache-aligned. This flag removes this constraint, and no + * padding will be present between objects. This flag implies + * MEMPOOL_F_NO_SPREAD. + * - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior + * when using rte_mempool_put() or rte_mempool_put_bulk() is + * "single-producer". Otherwise, it is "multi-producers". + * - MEMPOOL_F_SC_GET: If this flag is set, the default behavior + * when using rte_mempool_get() or rte_mempool_get_bulk() is + * "single-consumer". Otherwise, it is "multi-consumers". + * @return + * The pointer to the new allocated mempool, on success. NULL on error + * with rte_errno set appropriately. Possible rte_errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - EINVAL - cache size provided is too large + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_mempool * +rte_mempool_create(const char *name, unsigned n, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags); + +/** + * Create a new mempool named *name* in memory. + * + * This function uses ``memzone_reserve()`` to allocate memory. The + * pool contains n elements of elt_size. Its size is set to n. + * Depending on the input parameters, mempool elements can be either allocated + * together with the mempool header, or an externally provided memory buffer + * could be used to store mempool objects. In later case, that external + * memory buffer can consist of set of disjoint physical pages. + * + * @param name + * The name of the mempool. + * @param n + * The number of elements in the mempool. The optimum size (in terms of + * memory usage) for a mempool is when n is a power of two minus one: + * n = (2^q - 1). + * @param elt_size + * The size of each element. + * @param cache_size + * If cache_size is non-zero, the rte_mempool library will try to + * limit the accesses to the common lockless pool, by maintaining a + * per-lcore object cache. This argument must be lower or equal to + * CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE. It is advised to choose + * cache_size to have "n modulo cache_size == 0": if this is + * not the case, some elements will always stay in the pool and will + * never be used. The access to the per-lcore table is of course + * faster than the multi-producer/consumer pool. The cache can be + * disabled if the cache_size argument is set to 0; it can be useful to + * avoid losing objects in cache. Note that even if not used, the + * memory space for cache is always reserved in a mempool structure, + * except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0. + * @param private_data_size + * The size of the private data appended after the mempool + * structure. This is useful for storing some private data after the + * mempool structure, as is done for rte_mbuf_pool for example. + * @param mp_init + * A function pointer that is called for initialization of the pool, + * before object initialization. The user can initialize the private + * data in this function if needed. This parameter can be NULL if + * not needed. + * @param mp_init_arg + * An opaque pointer to data that can be used in the mempool + * constructor function. + * @param obj_init + * A function pointer that is called for each object at + * initialization of the pool. The user can set some meta data in + * objects if needed. This parameter can be NULL if not needed. + * The obj_init() function takes the mempool pointer, the init_arg, + * the object pointer and the object number as parameters. + * @param obj_init_arg + * An opaque pointer to data that can be used as an argument for + * each call to the object constructor function. + * @param socket_id + * The *socket_id* argument is the socket identifier in the case of + * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The *flags* arguments is an OR of following flags: + * - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread + * between channels in RAM: the pool allocator will add padding + * between objects depending on the hardware configuration. See + * Memory alignment constraints for details. If this flag is set, + * the allocator will just align them to a cache line. + * - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are + * cache-aligned. This flag removes this constraint, and no + * padding will be present between objects. This flag implies + * MEMPOOL_F_NO_SPREAD. + * - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior + * when using rte_mempool_put() or rte_mempool_put_bulk() is + * "single-producer". Otherwise, it is "multi-producers". + * - MEMPOOL_F_SC_GET: If this flag is set, the default behavior + * when using rte_mempool_get() or rte_mempool_get_bulk() is + * "single-consumer". Otherwise, it is "multi-consumers". + * @param vaddr + * Virtual address of the externally allocated memory buffer. + * Will be used to store mempool objects. + * @param paddr + * Array of physical addresses of the pages that comprises given memory + * buffer. + * @param pg_num + * Number of elements in the paddr array. + * @param pg_shift + * LOG2 of the physical pages size. + * @return + * The pointer to the new allocated mempool, on success. NULL on error + * with rte_errno set appropriately. Possible rte_errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - EINVAL - cache size provided is too large + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_mempool * +rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags, void *vaddr, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift); + +/** + * Create a new mempool named *name* in memory on Xen Dom0. + * + * This function uses ``rte_mempool_xmem_create()`` to allocate memory. The + * pool contains n elements of elt_size. Its size is set to n. + * All elements of the mempool are allocated together with the mempool header, + * and memory buffer can consist of set of disjoint physical pages. + * + * @param name + * The name of the mempool. + * @param n + * The number of elements in the mempool. The optimum size (in terms of + * memory usage) for a mempool is when n is a power of two minus one: + * n = (2^q - 1). + * @param elt_size + * The size of each element. + * @param cache_size + * If cache_size is non-zero, the rte_mempool library will try to + * limit the accesses to the common lockless pool, by maintaining a + * per-lcore object cache. This argument must be lower or equal to + * CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE. It is advised to choose + * cache_size to have "n modulo cache_size == 0": if this is + * not the case, some elements will always stay in the pool and will + * never be used. The access to the per-lcore table is of course + * faster than the multi-producer/consumer pool. The cache can be + * disabled if the cache_size argument is set to 0; it can be useful to + * avoid losing objects in cache. Note that even if not used, the + * memory space for cache is always reserved in a mempool structure, + * except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0. + * @param private_data_size + * The size of the private data appended after the mempool + * structure. This is useful for storing some private data after the + * mempool structure, as is done for rte_mbuf_pool for example. + * @param mp_init + * A function pointer that is called for initialization of the pool, + * before object initialization. The user can initialize the private + * data in this function if needed. This parameter can be NULL if + * not needed. + * @param mp_init_arg + * An opaque pointer to data that can be used in the mempool + * constructor function. + * @param obj_init + * A function pointer that is called for each object at + * initialization of the pool. The user can set some meta data in + * objects if needed. This parameter can be NULL if not needed. + * The obj_init() function takes the mempool pointer, the init_arg, + * the object pointer and the object number as parameters. + * @param obj_init_arg + * An opaque pointer to data that can be used as an argument for + * each call to the object constructor function. + * @param socket_id + * The *socket_id* argument is the socket identifier in the case of + * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The *flags* arguments is an OR of following flags: + * - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread + * between channels in RAM: the pool allocator will add padding + * between objects depending on the hardware configuration. See + * Memory alignment constraints for details. If this flag is set, + * the allocator will just align them to a cache line. + * - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are + * cache-aligned. This flag removes this constraint, and no + * padding will be present between objects. This flag implies + * MEMPOOL_F_NO_SPREAD. + * - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior + * when using rte_mempool_put() or rte_mempool_put_bulk() is + * "single-producer". Otherwise, it is "multi-producers". + * - MEMPOOL_F_SC_GET: If this flag is set, the default behavior + * when using rte_mempool_get() or rte_mempool_get_bulk() is + * "single-consumer". Otherwise, it is "multi-consumers". + * @return + * The pointer to the new allocated mempool, on success. NULL on error + * with rte_errno set appropriately. Possible rte_errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - EINVAL - cache size provided is too large + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_mempool * +rte_dom0_mempool_create(const char *name, unsigned n, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags); + + +/** + * Dump the status of the mempool to the console. + * + * @param f + * A pointer to a file for output + * @param mp + * A pointer to the mempool structure. + */ +void rte_mempool_dump(FILE *f, const struct rte_mempool *mp); + +/** + * @internal Put several objects back in the mempool; used internally. + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to store back in the mempool, must be strictly + * positive. + * @param is_mp + * Mono-producer (0) or multi-producers (1). + */ +static inline void __attribute__((always_inline)) +__mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table, + unsigned n, int is_mp) +{ +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 + struct rte_mempool_cache *cache; + uint32_t index; + void **cache_objs; + unsigned lcore_id = rte_lcore_id(); + uint32_t cache_size = mp->cache_size; + uint32_t flushthresh = mp->cache_flushthresh; +#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ + + /* increment stat now, adding in mempool always success */ + __MEMPOOL_STAT_ADD(mp, put, n); + +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 + /* cache is not enabled or single producer or non-EAL thread */ + if (unlikely(cache_size == 0 || is_mp == 0 || + lcore_id >= RTE_MAX_LCORE)) + goto ring_enqueue; + + /* Go straight to ring if put would overflow mem allocated for cache */ + if (unlikely(n > RTE_MEMPOOL_CACHE_MAX_SIZE)) + goto ring_enqueue; + + cache = &mp->local_cache[lcore_id]; + cache_objs = &cache->objs[cache->len]; + + /* + * The cache follows the following algorithm + * 1. Add the objects to the cache + * 2. Anything greater than the cache min value (if it crosses the + * cache flush threshold) is flushed to the ring. + */ + + /* Add elements back into the cache */ + for (index = 0; index < n; ++index, obj_table++) + cache_objs[index] = *obj_table; + + cache->len += n; + + if (cache->len >= flushthresh) { + rte_ring_mp_enqueue_bulk(mp->ring, &cache->objs[cache_size], + cache->len - cache_size); + cache->len = cache_size; + } + + return; + +ring_enqueue: +#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ + + /* push remaining objects in ring */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (is_mp) { + if (rte_ring_mp_enqueue_bulk(mp->ring, obj_table, n) < 0) + rte_panic("cannot put objects in mempool\n"); + } + else { + if (rte_ring_sp_enqueue_bulk(mp->ring, obj_table, n) < 0) + rte_panic("cannot put objects in mempool\n"); + } +#else + if (is_mp) + rte_ring_mp_enqueue_bulk(mp->ring, obj_table, n); + else + rte_ring_sp_enqueue_bulk(mp->ring, obj_table, n); +#endif +} + + +/** + * Put several objects back in the mempool (multi-producers safe). + * + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the mempool from the obj_table. + */ +static inline void __attribute__((always_inline)) +rte_mempool_mp_put_bulk(struct rte_mempool *mp, void * const *obj_table, + unsigned n) +{ + __mempool_check_cookies(mp, obj_table, n, 0); + __mempool_put_bulk(mp, obj_table, n, 1); +} + +/** + * Put several objects back in the mempool (NOT multi-producers safe). + * + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the mempool from obj_table. + */ +static inline void +rte_mempool_sp_put_bulk(struct rte_mempool *mp, void * const *obj_table, + unsigned n) +{ + __mempool_check_cookies(mp, obj_table, n, 0); + __mempool_put_bulk(mp, obj_table, n, 0); +} + +/** + * Put several objects back in the mempool. + * + * This function calls the multi-producer or the single-producer + * version depending on the default behavior that was specified at + * mempool creation time (see flags). + * + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the mempool from obj_table. + */ +static inline void __attribute__((always_inline)) +rte_mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table, + unsigned n) +{ + __mempool_check_cookies(mp, obj_table, n, 0); + __mempool_put_bulk(mp, obj_table, n, !(mp->flags & MEMPOOL_F_SP_PUT)); +} + +/** + * Put one object in the mempool (multi-producers safe). + * + * @param mp + * A pointer to the mempool structure. + * @param obj + * A pointer to the object to be added. + */ +static inline void __attribute__((always_inline)) +rte_mempool_mp_put(struct rte_mempool *mp, void *obj) +{ + rte_mempool_mp_put_bulk(mp, &obj, 1); +} + +/** + * Put one object back in the mempool (NOT multi-producers safe). + * + * @param mp + * A pointer to the mempool structure. + * @param obj + * A pointer to the object to be added. + */ +static inline void __attribute__((always_inline)) +rte_mempool_sp_put(struct rte_mempool *mp, void *obj) +{ + rte_mempool_sp_put_bulk(mp, &obj, 1); +} + +/** + * Put one object back in the mempool. + * + * This function calls the multi-producer or the single-producer + * version depending on the default behavior that was specified at + * mempool creation time (see flags). + * + * @param mp + * A pointer to the mempool structure. + * @param obj + * A pointer to the object to be added. + */ +static inline void __attribute__((always_inline)) +rte_mempool_put(struct rte_mempool *mp, void *obj) +{ + rte_mempool_put_bulk(mp, &obj, 1); +} + +/** + * @internal Get several objects from the mempool; used internally. + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to get, must be strictly positive. + * @param is_mc + * Mono-consumer (0) or multi-consumers (1). + * @return + * - >=0: Success; number of objects supplied. + * - <0: Error; code of ring dequeue function. + */ +static inline int __attribute__((always_inline)) +__mempool_get_bulk(struct rte_mempool *mp, void **obj_table, + unsigned n, int is_mc) +{ + int ret; +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 + struct rte_mempool_cache *cache; + uint32_t index, len; + void **cache_objs; + unsigned lcore_id = rte_lcore_id(); + uint32_t cache_size = mp->cache_size; + + /* cache is not enabled or single consumer */ + if (unlikely(cache_size == 0 || is_mc == 0 || + n >= cache_size || lcore_id >= RTE_MAX_LCORE)) + goto ring_dequeue; + + cache = &mp->local_cache[lcore_id]; + cache_objs = cache->objs; + + /* Can this be satisfied from the cache? */ + if (cache->len < n) { + /* No. Backfill the cache first, and then fill from it */ + uint32_t req = n + (cache_size - cache->len); + + /* How many do we require i.e. number to fill the cache + the request */ + ret = rte_ring_mc_dequeue_bulk(mp->ring, &cache->objs[cache->len], req); + if (unlikely(ret < 0)) { + /* + * In the offchance that we are buffer constrained, + * where we are not able to allocate cache + n, go to + * the ring directly. If that fails, we are truly out of + * buffers. + */ + goto ring_dequeue; + } + + cache->len += req; + } + + /* Now fill in the response ... */ + for (index = 0, len = cache->len - 1; index < n; ++index, len--, obj_table++) + *obj_table = cache_objs[len]; + + cache->len -= n; + + __MEMPOOL_STAT_ADD(mp, get_success, n); + + return 0; + +ring_dequeue: +#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ + + /* get remaining objects from ring */ + if (is_mc) + ret = rte_ring_mc_dequeue_bulk(mp->ring, obj_table, n); + else + ret = rte_ring_sc_dequeue_bulk(mp->ring, obj_table, n); + + if (ret < 0) + __MEMPOOL_STAT_ADD(mp, get_fail, n); + else + __MEMPOOL_STAT_ADD(mp, get_success, n); + + return ret; +} + +/** + * Get several objects from the mempool (multi-consumers safe). + * + * If cache is enabled, objects will be retrieved first from cache, + * subsequently from the common pool. Note that it can return -ENOENT when + * the local cache and common pool are empty, even if cache from other + * lcores are full. + * + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to get from mempool to obj_table. + * @return + * - 0: Success; objects taken. + * - -ENOENT: Not enough entries in the mempool; no object is retrieved. + */ +static inline int __attribute__((always_inline)) +rte_mempool_mc_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n) +{ + int ret; + ret = __mempool_get_bulk(mp, obj_table, n, 1); + if (ret == 0) + __mempool_check_cookies(mp, obj_table, n, 1); + return ret; +} + +/** + * Get several objects from the mempool (NOT multi-consumers safe). + * + * If cache is enabled, objects will be retrieved first from cache, + * subsequently from the common pool. Note that it can return -ENOENT when + * the local cache and common pool are empty, even if cache from other + * lcores are full. + * + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to get from the mempool to obj_table. + * @return + * - 0: Success; objects taken. + * - -ENOENT: Not enough entries in the mempool; no object is + * retrieved. + */ +static inline int __attribute__((always_inline)) +rte_mempool_sc_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n) +{ + int ret; + ret = __mempool_get_bulk(mp, obj_table, n, 0); + if (ret == 0) + __mempool_check_cookies(mp, obj_table, n, 1); + return ret; +} + +/** + * Get several objects from the mempool. + * + * This function calls the multi-consumers or the single-consumer + * version, depending on the default behaviour that was specified at + * mempool creation time (see flags). + * + * If cache is enabled, objects will be retrieved first from cache, + * subsequently from the common pool. Note that it can return -ENOENT when + * the local cache and common pool are empty, even if cache from other + * lcores are full. + * + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to get from the mempool to obj_table. + * @return + * - 0: Success; objects taken + * - -ENOENT: Not enough entries in the mempool; no object is retrieved. + */ +static inline int __attribute__((always_inline)) +rte_mempool_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n) +{ + int ret; + ret = __mempool_get_bulk(mp, obj_table, n, + !(mp->flags & MEMPOOL_F_SC_GET)); + if (ret == 0) + __mempool_check_cookies(mp, obj_table, n, 1); + return ret; +} + +/** + * Get one object from the mempool (multi-consumers safe). + * + * If cache is enabled, objects will be retrieved first from cache, + * subsequently from the common pool. Note that it can return -ENOENT when + * the local cache and common pool are empty, even if cache from other + * lcores are full. + * + * @param mp + * A pointer to the mempool structure. + * @param obj_p + * A pointer to a void * pointer (object) that will be filled. + * @return + * - 0: Success; objects taken. + * - -ENOENT: Not enough entries in the mempool; no object is retrieved. + */ +static inline int __attribute__((always_inline)) +rte_mempool_mc_get(struct rte_mempool *mp, void **obj_p) +{ + return rte_mempool_mc_get_bulk(mp, obj_p, 1); +} + +/** + * Get one object from the mempool (NOT multi-consumers safe). + * + * If cache is enabled, objects will be retrieved first from cache, + * subsequently from the common pool. Note that it can return -ENOENT when + * the local cache and common pool are empty, even if cache from other + * lcores are full. + * + * @param mp + * A pointer to the mempool structure. + * @param obj_p + * A pointer to a void * pointer (object) that will be filled. + * @return + * - 0: Success; objects taken. + * - -ENOENT: Not enough entries in the mempool; no object is retrieved. + */ +static inline int __attribute__((always_inline)) +rte_mempool_sc_get(struct rte_mempool *mp, void **obj_p) +{ + return rte_mempool_sc_get_bulk(mp, obj_p, 1); +} + +/** + * Get one object from the mempool. + * + * This function calls the multi-consumers or the single-consumer + * version, depending on the default behavior that was specified at + * mempool creation (see flags). + * + * If cache is enabled, objects will be retrieved first from cache, + * subsequently from the common pool. Note that it can return -ENOENT when + * the local cache and common pool are empty, even if cache from other + * lcores are full. + * + * @param mp + * A pointer to the mempool structure. + * @param obj_p + * A pointer to a void * pointer (object) that will be filled. + * @return + * - 0: Success; objects taken. + * - -ENOENT: Not enough entries in the mempool; no object is retrieved. + */ +static inline int __attribute__((always_inline)) +rte_mempool_get(struct rte_mempool *mp, void **obj_p) +{ + return rte_mempool_get_bulk(mp, obj_p, 1); +} + +/** + * Return the number of entries in the mempool. + * + * When cache is enabled, this function has to browse the length of + * all lcores, so it should not be used in a data path, but only for + * debug purposes. + * + * @param mp + * A pointer to the mempool structure. + * @return + * The number of entries in the mempool. + */ +unsigned rte_mempool_count(const struct rte_mempool *mp); + +/** + * Return the number of free entries in the mempool ring. + * i.e. how many entries can be freed back to the mempool. + * + * NOTE: This corresponds to the number of elements *allocated* from the + * memory pool, not the number of elements in the pool itself. To count + * the number elements currently available in the pool, use "rte_mempool_count" + * + * When cache is enabled, this function has to browse the length of + * all lcores, so it should not be used in a data path, but only for + * debug purposes. + * + * @param mp + * A pointer to the mempool structure. + * @return + * The number of free entries in the mempool. + */ +static inline unsigned +rte_mempool_free_count(const struct rte_mempool *mp) +{ + return mp->size - rte_mempool_count(mp); +} + +/** + * Test if the mempool is full. + * + * When cache is enabled, this function has to browse the length of all + * lcores, so it should not be used in a data path, but only for debug + * purposes. + * + * @param mp + * A pointer to the mempool structure. + * @return + * - 1: The mempool is full. + * - 0: The mempool is not full. + */ +static inline int +rte_mempool_full(const struct rte_mempool *mp) +{ + return !!(rte_mempool_count(mp) == mp->size); +} + +/** + * Test if the mempool is empty. + * + * When cache is enabled, this function has to browse the length of all + * lcores, so it should not be used in a data path, but only for debug + * purposes. + * + * @param mp + * A pointer to the mempool structure. + * @return + * - 1: The mempool is empty. + * - 0: The mempool is not empty. + */ +static inline int +rte_mempool_empty(const struct rte_mempool *mp) +{ + return !!(rte_mempool_count(mp) == 0); +} + +/** + * Return the physical address of elt, which is an element of the pool mp. + * + * @param mp + * A pointer to the mempool structure. + * @param elt + * A pointer (virtual address) to the element of the pool. + * @return + * The physical address of the elt element. + */ +static inline phys_addr_t +rte_mempool_virt2phy(const struct rte_mempool *mp, const void *elt) +{ + if (rte_eal_has_hugepages()) { + uintptr_t off; + + off = (const char *)elt - (const char *)mp->elt_va_start; + return (mp->elt_pa[off >> mp->pg_shift] + (off & mp->pg_mask)); + } else { + /* + * If huge pages are disabled, we cannot assume the + * memory region to be physically contiguous. + * Lookup for each element. + */ + return rte_mem_virt2phy(elt); + } +} + +/** + * Check the consistency of mempool objects. + * + * Verify the coherency of fields in the mempool structure. Also check + * that the cookies of mempool objects (even the ones that are not + * present in pool) have a correct value. If not, a panic will occur. + * + * @param mp + * A pointer to the mempool structure. + */ +void rte_mempool_audit(const struct rte_mempool *mp); + +/** + * Return a pointer to the private data in an mempool structure. + * + * @param mp + * A pointer to the mempool structure. + * @return + * A pointer to the private data. + */ +static inline void *rte_mempool_get_priv(struct rte_mempool *mp) +{ + return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num); +} + +/** + * Dump the status of all mempools on the console + * + * @param f + * A pointer to a file for output + */ +void rte_mempool_list_dump(FILE *f); + +/** + * Search a mempool from its name + * + * @param name + * The name of the mempool. + * @return + * The pointer to the mempool matching the name, or NULL if not found. + * NULL on error + * with rte_errno set appropriately. Possible rte_errno values include: + * - ENOENT - required entry not available to return. + * + */ +struct rte_mempool *rte_mempool_lookup(const char *name); + +/** + * Get the header, trailer and total size of a mempool element. + * + * Given a desired size of the mempool element and mempool flags, + * calculates header, trailer, body and total sizes of the mempool object. + * + * @param elt_size + * The size of each element. + * @param flags + * The flags used for the mempool creation. + * Consult rte_mempool_create() for more information about possible values. + * The size of each element. + * @param sz + * The calculated detailed size the mempool object. May be NULL. + * @return + * Total size of the mempool object. + */ +uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags, + struct rte_mempool_objsz *sz); + +/** + * Get the size of memory required to store mempool elements. + * + * Calculate the maximum amount of memory required to store given number + * of objects. Assume that the memory buffer will be aligned at page + * boundary. + * + * Note that if object size is bigger then page size, then it assumes + * that pages are grouped in subsets of physically continuous pages big + * enough to store at least one object. + * + * @param elt_num + * Number of elements. + * @param elt_sz + * The size of each element. + * @param pg_shift + * LOG2 of the physical pages size. + * @return + * Required memory size aligned at page boundary. + */ +size_t rte_mempool_xmem_size(uint32_t elt_num, size_t elt_sz, + uint32_t pg_shift); + +/** + * Get the size of memory required to store mempool elements. + * + * Calculate how much memory would be actually required with the given + * memory footprint to store required number of objects. + * + * @param vaddr + * Virtual address of the externally allocated memory buffer. + * Will be used to store mempool objects. + * @param elt_num + * Number of elements. + * @param elt_sz + * The size of each element. + * @param paddr + * Array of physical addresses of the pages that comprises given memory + * buffer. + * @param pg_num + * Number of elements in the paddr array. + * @param pg_shift + * LOG2 of the physical pages size. + * @return + * On success, the number of bytes needed to store given number of + * objects, aligned to the given page size. If the provided memory + * buffer is too small, return a negative value whose absolute value + * is the actual number of elements that can be stored in that buffer. + */ +ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, size_t elt_sz, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift); + +/** + * Walk list of all memory pools + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + */ +void rte_mempool_walk(void (*func)(const struct rte_mempool *, void *arg), + void *arg); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMPOOL_H_ */ diff --git a/src/dpdk22/lib/librte_net/rte_arp.h b/src/dpdk22/lib/librte_net/rte_arp.h new file mode 100644 index 00000000..18364187 --- /dev/null +++ b/src/dpdk22/lib/librte_net/rte_arp.h @@ -0,0 +1,83 @@ +/* BSD LICENSE + * + * Copyright(c) 2013 6WIND. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ARP_H_ +#define _RTE_ARP_H_ + +/** + * @file + * + * ARP-related defines + */ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * ARP header IPv4 payload. + */ +struct arp_ipv4 { + struct ether_addr arp_sha; /**< sender hardware address */ + uint32_t arp_sip; /**< sender IP address */ + struct ether_addr arp_tha; /**< target hardware address */ + uint32_t arp_tip; /**< target IP address */ +} __attribute__((__packed__)); + +/** + * ARP header. + */ +struct arp_hdr { + uint16_t arp_hrd; /* format of hardware address */ +#define ARP_HRD_ETHER 1 /* ARP Ethernet address format */ + + uint16_t arp_pro; /* format of protocol address */ + uint8_t arp_hln; /* length of hardware address */ + uint8_t arp_pln; /* length of protocol address */ + uint16_t arp_op; /* ARP opcode (command) */ +#define ARP_OP_REQUEST 1 /* request to resolve address */ +#define ARP_OP_REPLY 2 /* response to previous request */ +#define ARP_OP_REVREQUEST 3 /* request proto addr given hardware */ +#define ARP_OP_REVREPLY 4 /* response giving protocol address */ +#define ARP_OP_INVREQUEST 8 /* request to identify peer */ +#define ARP_OP_INVREPLY 9 /* response identifying peer */ + + struct arp_ipv4 arp_data; +} __attribute__((__packed__)); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ARP_H_ */ diff --git a/src/dpdk22/lib/librte_net/rte_icmp.h b/src/dpdk22/lib/librte_net/rte_icmp.h new file mode 100644 index 00000000..8b287f6d --- /dev/null +++ b/src/dpdk22/lib/librte_net/rte_icmp.h @@ -0,0 +1,101 @@ +/* BSD LICENSE + * + * Copyright(c) 2013 6WIND. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.h 8.3 (Berkeley) 1/3/94 + * $FreeBSD: src/sys/netinet/in.h,v 1.82 2003/10/25 09:37:10 ume Exp $ + */ + +#ifndef _RTE_ICMP_H_ +#define _RTE_ICMP_H_ + +/** + * @file + * + * ICMP-related defines + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * ICMP Header + */ +struct icmp_hdr { + uint8_t icmp_type; /* ICMP packet type. */ + uint8_t icmp_code; /* ICMP packet code. */ + uint16_t icmp_cksum; /* ICMP packet checksum. */ + uint16_t icmp_ident; /* ICMP packet identifier. */ + uint16_t icmp_seq_nb; /* ICMP packet sequence number. */ +} __attribute__((__packed__)); + +/* ICMP packet types */ +#define IP_ICMP_ECHO_REPLY 0 +#define IP_ICMP_ECHO_REQUEST 8 + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_ICMP_H_ */ diff --git a/src/dpdk22/lib/librte_net/rte_ip.h b/src/dpdk22/lib/librte_net/rte_ip.h new file mode 100644 index 00000000..5b7554ab --- /dev/null +++ b/src/dpdk22/lib/librte_net/rte_ip.h @@ -0,0 +1,413 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.h 8.3 (Berkeley) 1/3/94 + * $FreeBSD: src/sys/netinet/in.h,v 1.82 2003/10/25 09:37:10 ume Exp $ + */ + +#ifndef _RTE_IP_H_ +#define _RTE_IP_H_ + +/** + * @file + * + * IP-related defines + */ + +#include +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * IPv4 Header + */ +struct ipv4_hdr { + uint8_t version_ihl; /**< version and header length */ + uint8_t type_of_service; /**< type of service */ + uint16_t total_length; /**< length of packet */ + uint16_t packet_id; /**< packet ID */ + uint16_t fragment_offset; /**< fragmentation offset */ + uint8_t time_to_live; /**< time to live */ + uint8_t next_proto_id; /**< protocol ID */ + uint16_t hdr_checksum; /**< header checksum */ + uint32_t src_addr; /**< source address */ + uint32_t dst_addr; /**< destination address */ +} __attribute__((__packed__)); + +/** Create IPv4 address */ +#define IPv4(a,b,c,d) ((uint32_t)(((a) & 0xff) << 24) | \ + (((b) & 0xff) << 16) | \ + (((c) & 0xff) << 8) | \ + ((d) & 0xff)) + +/** Maximal IPv4 packet length (including a header) */ +#define IPV4_MAX_PKT_LEN 65535 + +/** Internet header length mask for version_ihl field */ +#define IPV4_HDR_IHL_MASK (0x0f) +/** + * Internet header length field multiplier (IHL field specifies overall header + * length in number of 4-byte words) + */ +#define IPV4_IHL_MULTIPLIER (4) + +/* Fragment Offset * Flags. */ +#define IPV4_HDR_DF_SHIFT 14 +#define IPV4_HDR_MF_SHIFT 13 +#define IPV4_HDR_FO_SHIFT 3 + +#define IPV4_HDR_DF_FLAG (1 << IPV4_HDR_DF_SHIFT) +#define IPV4_HDR_MF_FLAG (1 << IPV4_HDR_MF_SHIFT) + +#define IPV4_HDR_OFFSET_MASK ((1 << IPV4_HDR_MF_SHIFT) - 1) + +#define IPV4_HDR_OFFSET_UNITS 8 + +/* + * IPv4 address types + */ +#define IPV4_ANY ((uint32_t)0x00000000) /**< 0.0.0.0 */ +#define IPV4_LOOPBACK ((uint32_t)0x7f000001) /**< 127.0.0.1 */ +#define IPV4_BROADCAST ((uint32_t)0xe0000000) /**< 224.0.0.0 */ +#define IPV4_ALLHOSTS_GROUP ((uint32_t)0xe0000001) /**< 224.0.0.1 */ +#define IPV4_ALLRTRS_GROUP ((uint32_t)0xe0000002) /**< 224.0.0.2 */ +#define IPV4_MAX_LOCAL_GROUP ((uint32_t)0xe00000ff) /**< 224.0.0.255 */ + +/* + * IPv4 Multicast-related macros + */ +#define IPV4_MIN_MCAST IPv4(224, 0, 0, 0) /**< Minimal IPv4-multicast address */ +#define IPV4_MAX_MCAST IPv4(239, 255, 255, 255) /**< Maximum IPv4 multicast address */ + +#define IS_IPV4_MCAST(x) \ + ((x) >= IPV4_MIN_MCAST && (x) <= IPV4_MAX_MCAST) /**< check if IPv4 address is multicast */ + +/** + * @internal Calculate a sum of all words in the buffer. + * Helper routine for the rte_raw_cksum(). + * + * @param buf + * Pointer to the buffer. + * @param len + * Length of the buffer. + * @param sum + * Initial value of the sum. + * @return + * sum += Sum of all words in the buffer. + */ +static inline uint32_t +__rte_raw_cksum(const void *buf, size_t len, uint32_t sum) +{ + /* workaround gcc strict-aliasing warning */ + uintptr_t ptr = (uintptr_t)buf; + typedef uint16_t __attribute__((__may_alias__)) u16_p; + const u16_p *u16 = (const u16_p *)ptr; + + while (len >= (sizeof(*u16) * 4)) { + sum += u16[0]; + sum += u16[1]; + sum += u16[2]; + sum += u16[3]; + len -= sizeof(*u16) * 4; + u16 += 4; + } + while (len >= sizeof(*u16)) { + sum += *u16; + len -= sizeof(*u16); + u16 += 1; + } + + /* if length is in odd bytes */ + if (len == 1) + sum += *((const uint8_t *)u16); + + return sum; +} + +/** + * @internal Reduce a sum to the non-complemented checksum. + * Helper routine for the rte_raw_cksum(). + * + * @param sum + * Value of the sum. + * @return + * The non-complemented checksum. + */ +static inline uint16_t +__rte_raw_cksum_reduce(uint32_t sum) +{ + sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff); + sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff); + return (uint16_t)sum; +} + +/** + * Process the non-complemented checksum of a buffer. + * + * @param buf + * Pointer to the buffer. + * @param len + * Length of the buffer. + * @return + * The non-complemented checksum. + */ +static inline uint16_t +rte_raw_cksum(const void *buf, size_t len) +{ + uint32_t sum; + + sum = __rte_raw_cksum(buf, len, 0); + return __rte_raw_cksum_reduce(sum); +} + +/** + * Process the IPv4 checksum of an IPv4 header. + * + * The checksum field must be set to 0 by the caller. + * + * @param ipv4_hdr + * The pointer to the contiguous IPv4 header. + * @return + * The complemented checksum to set in the IP packet. + */ +static inline uint16_t +rte_ipv4_cksum(const struct ipv4_hdr *ipv4_hdr) +{ + uint16_t cksum; + cksum = rte_raw_cksum(ipv4_hdr, sizeof(struct ipv4_hdr)); + return (cksum == 0xffff) ? cksum : ~cksum; +} + +/** + * Process the pseudo-header checksum of an IPv4 header. + * + * The checksum field must be set to 0 by the caller. + * + * Depending on the ol_flags, the pseudo-header checksum expected by the + * drivers is not the same. For instance, when TSO is enabled, the IP + * payload length must not be included in the packet. + * + * When ol_flags is 0, it computes the standard pseudo-header checksum. + * + * @param ipv4_hdr + * The pointer to the contiguous IPv4 header. + * @param ol_flags + * The ol_flags of the associated mbuf. + * @return + * The non-complemented checksum to set in the L4 header. + */ +static inline uint16_t +rte_ipv4_phdr_cksum(const struct ipv4_hdr *ipv4_hdr, uint64_t ol_flags) +{ + struct ipv4_psd_header { + uint32_t src_addr; /* IP address of source host. */ + uint32_t dst_addr; /* IP address of destination host. */ + uint8_t zero; /* zero. */ + uint8_t proto; /* L4 protocol type. */ + uint16_t len; /* L4 length. */ + } psd_hdr; + + psd_hdr.src_addr = ipv4_hdr->src_addr; + psd_hdr.dst_addr = ipv4_hdr->dst_addr; + psd_hdr.zero = 0; + psd_hdr.proto = ipv4_hdr->next_proto_id; + if (ol_flags & PKT_TX_TCP_SEG) { + psd_hdr.len = 0; + } else { + psd_hdr.len = rte_cpu_to_be_16( + (uint16_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) + - sizeof(struct ipv4_hdr))); + } + return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr)); +} + +/** + * Process the IPv4 UDP or TCP checksum. + * + * The IPv4 header should not contains options. The IP and layer 4 + * checksum must be set to 0 in the packet by the caller. + * + * @param ipv4_hdr + * The pointer to the contiguous IPv4 header. + * @param l4_hdr + * The pointer to the beginning of the L4 header. + * @return + * The complemented checksum to set in the IP packet. + */ +static inline uint16_t +rte_ipv4_udptcp_cksum(const struct ipv4_hdr *ipv4_hdr, const void *l4_hdr) +{ + uint32_t cksum; + uint32_t l4_len; + + l4_len = rte_be_to_cpu_16(ipv4_hdr->total_length) - + sizeof(struct ipv4_hdr); + + cksum = rte_raw_cksum(l4_hdr, l4_len); + cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0); + + cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); + cksum = (~cksum) & 0xffff; + if (cksum == 0) + cksum = 0xffff; + + return cksum; +} + +/** + * IPv6 Header + */ +struct ipv6_hdr { + uint32_t vtc_flow; /**< IP version, traffic class & flow label. */ + uint16_t payload_len; /**< IP packet length - includes sizeof(ip_header). */ + uint8_t proto; /**< Protocol, next header. */ + uint8_t hop_limits; /**< Hop limits. */ + uint8_t src_addr[16]; /**< IP address of source host. */ + uint8_t dst_addr[16]; /**< IP address of destination host(s). */ +} __attribute__((__packed__)); + +/** + * Process the pseudo-header checksum of an IPv6 header. + * + * Depending on the ol_flags, the pseudo-header checksum expected by the + * drivers is not the same. For instance, when TSO is enabled, the IPv6 + * payload length must not be included in the packet. + * + * When ol_flags is 0, it computes the standard pseudo-header checksum. + * + * @param ipv6_hdr + * The pointer to the contiguous IPv6 header. + * @param ol_flags + * The ol_flags of the associated mbuf. + * @return + * The non-complemented checksum to set in the L4 header. + */ +static inline uint16_t +rte_ipv6_phdr_cksum(const struct ipv6_hdr *ipv6_hdr, uint64_t ol_flags) +{ + uint32_t sum; + struct { + uint32_t len; /* L4 length. */ + uint32_t proto; /* L4 protocol - top 3 bytes must be zero */ + } psd_hdr; + + psd_hdr.proto = (ipv6_hdr->proto << 24); + if (ol_flags & PKT_TX_TCP_SEG) { + psd_hdr.len = 0; + } else { + psd_hdr.len = ipv6_hdr->payload_len; + } + + sum = __rte_raw_cksum(ipv6_hdr->src_addr, + sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr), + 0); + sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum); + return __rte_raw_cksum_reduce(sum); +} + +/** + * Process the IPv6 UDP or TCP checksum. + * + * The IPv4 header should not contains options. The layer 4 checksum + * must be set to 0 in the packet by the caller. + * + * @param ipv6_hdr + * The pointer to the contiguous IPv6 header. + * @param l4_hdr + * The pointer to the beginning of the L4 header. + * @return + * The complemented checksum to set in the IP packet. + */ +static inline uint16_t +rte_ipv6_udptcp_cksum(const struct ipv6_hdr *ipv6_hdr, const void *l4_hdr) +{ + uint32_t cksum; + uint32_t l4_len; + + l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len); + + cksum = rte_raw_cksum(l4_hdr, l4_len); + cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0); + + cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); + cksum = (~cksum) & 0xffff; + if (cksum == 0) + cksum = 0xffff; + + return cksum; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_IP_H_ */ diff --git a/src/dpdk22/lib/librte_net/rte_sctp.h b/src/dpdk22/lib/librte_net/rte_sctp.h new file mode 100644 index 00000000..688e126f --- /dev/null +++ b/src/dpdk22/lib/librte_net/rte_sctp.h @@ -0,0 +1,99 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.h 8.3 (Berkeley) 1/3/94 + * $FreeBSD: src/sys/netinet/in.h,v 1.82 2003/10/25 09:37:10 ume Exp $ + */ + +/** + * @file + * + * SCTP-related defines + */ + +#ifndef _RTE_SCTP_H_ +#define _RTE_SCTP_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/** + * SCTP Header + */ +struct sctp_hdr { + uint16_t src_port; /**< Source port. */ + uint16_t dst_port; /**< Destin port. */ + uint32_t tag; /**< Validation tag. */ + uint32_t cksum; /**< Checksum. */ +} __attribute__((__packed__)); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_SCTP_H_ */ diff --git a/src/dpdk22/lib/librte_net/rte_tcp.h b/src/dpdk22/lib/librte_net/rte_tcp.h new file mode 100644 index 00000000..28b61e6d --- /dev/null +++ b/src/dpdk22/lib/librte_net/rte_tcp.h @@ -0,0 +1,104 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.h 8.3 (Berkeley) 1/3/94 + * $FreeBSD: src/sys/netinet/in.h,v 1.82 2003/10/25 09:37:10 ume Exp $ + */ + +#ifndef _RTE_TCP_H_ +#define _RTE_TCP_H_ + +/** + * @file + * + * TCP-related defines + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * TCP Header + */ +struct tcp_hdr { + uint16_t src_port; /**< TCP source port. */ + uint16_t dst_port; /**< TCP destination port. */ + uint32_t sent_seq; /**< TX data sequence number. */ + uint32_t recv_ack; /**< RX data acknowledgement sequence number. */ + uint8_t data_off; /**< Data offset. */ + uint8_t tcp_flags; /**< TCP flags */ + uint16_t rx_win; /**< RX flow control window. */ + uint16_t cksum; /**< TCP checksum. */ + uint16_t tcp_urp; /**< TCP urgent pointer, if any. */ +} __attribute__((__packed__)); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_TCP_H_ */ diff --git a/src/dpdk22/lib/librte_net/rte_udp.h b/src/dpdk22/lib/librte_net/rte_udp.h new file mode 100644 index 00000000..bc5be4af --- /dev/null +++ b/src/dpdk22/lib/librte_net/rte_udp.h @@ -0,0 +1,99 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.h 8.3 (Berkeley) 1/3/94 + * $FreeBSD: src/sys/netinet/in.h,v 1.82 2003/10/25 09:37:10 ume Exp $ + */ + +#ifndef _RTE_UDP_H_ +#define _RTE_UDP_H_ + +/** + * @file + * + * UDP-related defines + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * UDP Header + */ +struct udp_hdr { + uint16_t src_port; /**< UDP source port. */ + uint16_t dst_port; /**< UDP destination port. */ + uint16_t dgram_len; /**< UDP datagram length */ + uint16_t dgram_cksum; /**< UDP datagram checksum */ +} __attribute__((__packed__)); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_UDP_H_ */ diff --git a/src/dpdk22/lib/librte_pipeline/rte_pipeline.c b/src/dpdk22/lib/librte_pipeline/rte_pipeline.c new file mode 100644 index 00000000..d625fd25 --- /dev/null +++ b/src/dpdk22/lib/librte_pipeline/rte_pipeline.c @@ -0,0 +1,1638 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_pipeline.h" + +#define RTE_TABLE_INVALID UINT32_MAX + +#ifdef RTE_PIPELINE_STATS_COLLECT +#define RTE_PIPELINE_STATS_ADD(counter, val) \ + ({ (counter) += (val); }) + +#define RTE_PIPELINE_STATS_ADD_M(counter, mask) \ + ({ (counter) += __builtin_popcountll(mask); }) +#else +#define RTE_PIPELINE_STATS_ADD(counter, val) +#define RTE_PIPELINE_STATS_ADD_M(counter, mask) +#endif + +struct rte_port_in { + /* Input parameters */ + struct rte_port_in_ops ops; + rte_pipeline_port_in_action_handler f_action; + void *arg_ah; + uint32_t burst_size; + + /* The table to which this port is connected */ + uint32_t table_id; + + /* Handle to low-level port */ + void *h_port; + + /* List of enabled ports */ + struct rte_port_in *next; + + uint64_t n_pkts_dropped_by_ah; +}; + +struct rte_port_out { + /* Input parameters */ + struct rte_port_out_ops ops; + rte_pipeline_port_out_action_handler f_action; + rte_pipeline_port_out_action_handler_bulk f_action_bulk; + void *arg_ah; + + /* Handle to low-level port */ + void *h_port; + + uint64_t n_pkts_dropped_by_ah; +}; + +struct rte_table { + /* Input parameters */ + struct rte_table_ops ops; + rte_pipeline_table_action_handler_hit f_action_hit; + rte_pipeline_table_action_handler_miss f_action_miss; + void *arg_ah; + struct rte_pipeline_table_entry *default_entry; + uint32_t entry_size; + + uint32_t table_next_id; + uint32_t table_next_id_valid; + + /* Handle to the low-level table object */ + void *h_table; + + /* Stats for this table. */ + uint64_t n_pkts_dropped_by_lkp_hit_ah; + uint64_t n_pkts_dropped_by_lkp_miss_ah; + uint64_t n_pkts_dropped_lkp_hit; + uint64_t n_pkts_dropped_lkp_miss; +}; + +#define RTE_PIPELINE_MAX_NAME_SZ 124 + +struct rte_pipeline { + /* Input parameters */ + char name[RTE_PIPELINE_MAX_NAME_SZ]; + int socket_id; + uint32_t offset_port_id; + + /* Internal tables */ + struct rte_port_in ports_in[RTE_PIPELINE_PORT_IN_MAX]; + struct rte_port_out ports_out[RTE_PIPELINE_PORT_OUT_MAX]; + struct rte_table tables[RTE_PIPELINE_TABLE_MAX]; + + /* Occupancy of internal tables */ + uint32_t num_ports_in; + uint32_t num_ports_out; + uint32_t num_tables; + + /* List of enabled ports */ + uint64_t enabled_port_in_mask; + struct rte_port_in *port_in_first; + + /* Pipeline run structures */ + struct rte_mbuf *pkts[RTE_PORT_IN_BURST_SIZE_MAX]; + struct rte_pipeline_table_entry *entries[RTE_PORT_IN_BURST_SIZE_MAX]; + uint64_t action_mask0[RTE_PIPELINE_ACTIONS]; + uint64_t action_mask1[RTE_PIPELINE_ACTIONS]; +} __rte_cache_aligned; + +static inline uint32_t +rte_mask_get_next(uint64_t mask, uint32_t pos) +{ + uint64_t mask_rot = (mask << ((63 - pos) & 0x3F)) | + (mask >> ((pos + 1) & 0x3F)); + return (__builtin_ctzll(mask_rot) - (63 - pos)) & 0x3F; +} + +static inline uint32_t +rte_mask_get_prev(uint64_t mask, uint32_t pos) +{ + uint64_t mask_rot = (mask >> (pos & 0x3F)) | + (mask << ((64 - pos) & 0x3F)); + return ((63 - __builtin_clzll(mask_rot)) + pos) & 0x3F; +} + +static void +rte_pipeline_table_free(struct rte_table *table); + +static void +rte_pipeline_port_in_free(struct rte_port_in *port); + +static void +rte_pipeline_port_out_free(struct rte_port_out *port); + +/* + * Pipeline + * + */ +static int +rte_pipeline_check_params(struct rte_pipeline_params *params) +{ + if (params == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: Incorrect value for parameter params\n", __func__); + return -EINVAL; + } + + /* name */ + if (params->name == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: Incorrect value for parameter name\n", __func__); + return -EINVAL; + } + + /* socket */ + if ((params->socket_id < 0) || + (params->socket_id >= RTE_MAX_NUMA_NODES)) { + RTE_LOG(ERR, PIPELINE, + "%s: Incorrect value for parameter socket_id\n", + __func__); + return -EINVAL; + } + + return 0; +} + +struct rte_pipeline * +rte_pipeline_create(struct rte_pipeline_params *params) +{ + struct rte_pipeline *p; + int status; + + /* Check input parameters */ + status = rte_pipeline_check_params(params); + if (status != 0) { + RTE_LOG(ERR, PIPELINE, + "%s: Pipeline params check failed (%d)\n", + __func__, status); + return NULL; + } + + /* Allocate memory for the pipeline on requested socket */ + p = rte_zmalloc_socket("PIPELINE", sizeof(struct rte_pipeline), + RTE_CACHE_LINE_SIZE, params->socket_id); + + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: Pipeline memory allocation failed\n", __func__); + return NULL; + } + + /* Save input parameters */ + snprintf(p->name, RTE_PIPELINE_MAX_NAME_SZ, "%s", params->name); + p->socket_id = params->socket_id; + p->offset_port_id = params->offset_port_id; + + /* Initialize pipeline internal data structure */ + p->num_ports_in = 0; + p->num_ports_out = 0; + p->num_tables = 0; + p->enabled_port_in_mask = 0; + p->port_in_first = NULL; + + return p; +} + +int +rte_pipeline_free(struct rte_pipeline *p) +{ + uint32_t i; + + /* Check input parameters */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: rte_pipeline parameter is NULL\n", __func__); + return -EINVAL; + } + + /* Free input ports */ + for (i = 0; i < p->num_ports_in; i++) { + struct rte_port_in *port = &p->ports_in[i]; + + rte_pipeline_port_in_free(port); + } + + /* Free tables */ + for (i = 0; i < p->num_tables; i++) { + struct rte_table *table = &p->tables[i]; + + rte_pipeline_table_free(table); + } + + /* Free output ports */ + for (i = 0; i < p->num_ports_out; i++) { + struct rte_port_out *port = &p->ports_out[i]; + + rte_pipeline_port_out_free(port); + } + + /* Free pipeline memory */ + rte_free(p); + + return 0; +} + +/* + * Table + * + */ +static int +rte_table_check_params(struct rte_pipeline *p, + struct rte_pipeline_table_params *params, + uint32_t *table_id) +{ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter is NULL\n", + __func__); + return -EINVAL; + } + if (params == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: params parameter is NULL\n", + __func__); + return -EINVAL; + } + if (table_id == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: table_id parameter is NULL\n", + __func__); + return -EINVAL; + } + + /* ops */ + if (params->ops == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: params->ops is NULL\n", + __func__); + return -EINVAL; + } + + if (params->ops->f_create == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: f_create function pointer is NULL\n", __func__); + return -EINVAL; + } + + if (params->ops->f_lookup == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: f_lookup function pointer is NULL\n", __func__); + return -EINVAL; + } + + /* De we have room for one more table? */ + if (p->num_tables == RTE_PIPELINE_TABLE_MAX) { + RTE_LOG(ERR, PIPELINE, + "%s: Incorrect value for num_tables parameter\n", + __func__); + return -EINVAL; + } + + return 0; +} + +int +rte_pipeline_table_create(struct rte_pipeline *p, + struct rte_pipeline_table_params *params, + uint32_t *table_id) +{ + struct rte_table *table; + struct rte_pipeline_table_entry *default_entry; + void *h_table; + uint32_t entry_size, id; + int status; + + /* Check input arguments */ + status = rte_table_check_params(p, params, table_id); + if (status != 0) + return status; + + id = p->num_tables; + table = &p->tables[id]; + + /* Allocate space for the default table entry */ + entry_size = sizeof(struct rte_pipeline_table_entry) + + params->action_data_size; + default_entry = (struct rte_pipeline_table_entry *) rte_zmalloc_socket( + "PIPELINE", entry_size, RTE_CACHE_LINE_SIZE, p->socket_id); + if (default_entry == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: Failed to allocate default entry\n", __func__); + return -EINVAL; + } + + /* Create the table */ + h_table = params->ops->f_create(params->arg_create, p->socket_id, + entry_size); + if (h_table == NULL) { + rte_free(default_entry); + RTE_LOG(ERR, PIPELINE, "%s: Table creation failed\n", __func__); + return -EINVAL; + } + + /* Commit current table to the pipeline */ + p->num_tables++; + *table_id = id; + + /* Save input parameters */ + memcpy(&table->ops, params->ops, sizeof(struct rte_table_ops)); + table->f_action_hit = params->f_action_hit; + table->f_action_miss = params->f_action_miss; + table->arg_ah = params->arg_ah; + table->entry_size = entry_size; + + /* Clear the lookup miss actions (to be set later through API) */ + table->default_entry = default_entry; + table->default_entry->action = RTE_PIPELINE_ACTION_DROP; + + /* Initialize table internal data structure */ + table->h_table = h_table; + table->table_next_id = 0; + table->table_next_id_valid = 0; + + return 0; +} + +void +rte_pipeline_table_free(struct rte_table *table) +{ + if (table->ops.f_free != NULL) + table->ops.f_free(table->h_table); + + rte_free(table->default_entry); +} + +int +rte_pipeline_table_default_entry_add(struct rte_pipeline *p, + uint32_t table_id, + struct rte_pipeline_table_entry *default_entry, + struct rte_pipeline_table_entry **default_entry_ptr) +{ + struct rte_table *table; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter is NULL\n", + __func__); + return -EINVAL; + } + + if (default_entry == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: default_entry parameter is NULL\n", __func__); + return -EINVAL; + } + + if (table_id >= p->num_tables) { + RTE_LOG(ERR, PIPELINE, + "%s: table_id %d out of range\n", __func__, table_id); + return -EINVAL; + } + + table = &p->tables[table_id]; + + if ((default_entry->action == RTE_PIPELINE_ACTION_TABLE) && + table->table_next_id_valid && + (default_entry->table_id != table->table_next_id)) { + RTE_LOG(ERR, PIPELINE, + "%s: Tree-like topologies not allowed\n", __func__); + return -EINVAL; + } + + /* Set the lookup miss actions */ + if ((default_entry->action == RTE_PIPELINE_ACTION_TABLE) && + (table->table_next_id_valid == 0)) { + table->table_next_id = default_entry->table_id; + table->table_next_id_valid = 1; + } + + memcpy(table->default_entry, default_entry, table->entry_size); + + *default_entry_ptr = table->default_entry; + return 0; +} + +int +rte_pipeline_table_default_entry_delete(struct rte_pipeline *p, + uint32_t table_id, + struct rte_pipeline_table_entry *entry) +{ + struct rte_table *table; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: pipeline parameter is NULL\n", __func__); + return -EINVAL; + } + + if (table_id >= p->num_tables) { + RTE_LOG(ERR, PIPELINE, + "%s: table_id %d out of range\n", __func__, table_id); + return -EINVAL; + } + + table = &p->tables[table_id]; + + /* Save the current contents of the default entry */ + if (entry) + memcpy(entry, table->default_entry, table->entry_size); + + /* Clear the lookup miss actions */ + memset(table->default_entry, 0, table->entry_size); + table->default_entry->action = RTE_PIPELINE_ACTION_DROP; + + return 0; +} + +int +rte_pipeline_table_entry_add(struct rte_pipeline *p, + uint32_t table_id, + void *key, + struct rte_pipeline_table_entry *entry, + int *key_found, + struct rte_pipeline_table_entry **entry_ptr) +{ + struct rte_table *table; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter is NULL\n", + __func__); + return -EINVAL; + } + + if (key == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: key parameter is NULL\n", __func__); + return -EINVAL; + } + + if (entry == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: entry parameter is NULL\n", + __func__); + return -EINVAL; + } + + if (table_id >= p->num_tables) { + RTE_LOG(ERR, PIPELINE, + "%s: table_id %d out of range\n", __func__, table_id); + return -EINVAL; + } + + table = &p->tables[table_id]; + + if (table->ops.f_add == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: f_add function pointer NULL\n", + __func__); + return -EINVAL; + } + + if ((entry->action == RTE_PIPELINE_ACTION_TABLE) && + table->table_next_id_valid && + (entry->table_id != table->table_next_id)) { + RTE_LOG(ERR, PIPELINE, + "%s: Tree-like topologies not allowed\n", __func__); + return -EINVAL; + } + + /* Add entry */ + if ((entry->action == RTE_PIPELINE_ACTION_TABLE) && + (table->table_next_id_valid == 0)) { + table->table_next_id = entry->table_id; + table->table_next_id_valid = 1; + } + + return (table->ops.f_add)(table->h_table, key, (void *) entry, + key_found, (void **) entry_ptr); +} + +int +rte_pipeline_table_entry_delete(struct rte_pipeline *p, + uint32_t table_id, + void *key, + int *key_found, + struct rte_pipeline_table_entry *entry) +{ + struct rte_table *table; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + if (key == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: key parameter is NULL\n", + __func__); + return -EINVAL; + } + + if (table_id >= p->num_tables) { + RTE_LOG(ERR, PIPELINE, + "%s: table_id %d out of range\n", __func__, table_id); + return -EINVAL; + } + + table = &p->tables[table_id]; + + if (table->ops.f_delete == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: f_delete function pointer NULL\n", __func__); + return -EINVAL; + } + + return (table->ops.f_delete)(table->h_table, key, key_found, entry); +} + +int rte_pipeline_table_entry_add_bulk(struct rte_pipeline *p, + uint32_t table_id, + void **keys, + struct rte_pipeline_table_entry **entries, + uint32_t n_keys, + int *key_found, + struct rte_pipeline_table_entry **entries_ptr) +{ + struct rte_table *table; + uint32_t i; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter is NULL\n", + __func__); + return -EINVAL; + } + + if (keys == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: keys parameter is NULL\n", __func__); + return -EINVAL; + } + + if (entries == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: entries parameter is NULL\n", + __func__); + return -EINVAL; + } + + if (table_id >= p->num_tables) { + RTE_LOG(ERR, PIPELINE, + "%s: table_id %d out of range\n", __func__, table_id); + return -EINVAL; + } + + table = &p->tables[table_id]; + + if (table->ops.f_add_bulk == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: f_add_bulk function pointer NULL\n", + __func__); + return -EINVAL; + } + + for (i = 0; i < n_keys; i++) { + if ((entries[i]->action == RTE_PIPELINE_ACTION_TABLE) && + table->table_next_id_valid && + (entries[i]->table_id != table->table_next_id)) { + RTE_LOG(ERR, PIPELINE, + "%s: Tree-like topologies not allowed\n", __func__); + return -EINVAL; + } + } + + /* Add entry */ + for (i = 0; i < n_keys; i++) { + if ((entries[i]->action == RTE_PIPELINE_ACTION_TABLE) && + (table->table_next_id_valid == 0)) { + table->table_next_id = entries[i]->table_id; + table->table_next_id_valid = 1; + } + } + + return (table->ops.f_add_bulk)(table->h_table, keys, (void **) entries, + n_keys, key_found, (void **) entries_ptr); +} + +int rte_pipeline_table_entry_delete_bulk(struct rte_pipeline *p, + uint32_t table_id, + void **keys, + uint32_t n_keys, + int *key_found, + struct rte_pipeline_table_entry **entries) +{ + struct rte_table *table; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + if (keys == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: key parameter is NULL\n", + __func__); + return -EINVAL; + } + + if (table_id >= p->num_tables) { + RTE_LOG(ERR, PIPELINE, + "%s: table_id %d out of range\n", __func__, table_id); + return -EINVAL; + } + + table = &p->tables[table_id]; + + if (table->ops.f_delete_bulk == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: f_delete function pointer NULL\n", __func__); + return -EINVAL; + } + + return (table->ops.f_delete_bulk)(table->h_table, keys, n_keys, key_found, + (void **) entries); +} + +/* + * Port + * + */ +static int +rte_pipeline_port_in_check_params(struct rte_pipeline *p, + struct rte_pipeline_port_in_params *params, + uint32_t *port_id) +{ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + if (params == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: params parameter NULL\n", __func__); + return -EINVAL; + } + if (port_id == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: port_id parameter NULL\n", + __func__); + return -EINVAL; + } + + /* ops */ + if (params->ops == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: params->ops parameter NULL\n", + __func__); + return -EINVAL; + } + + if (params->ops->f_create == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: f_create function pointer NULL\n", __func__); + return -EINVAL; + } + + if (params->ops->f_rx == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: f_rx function pointer NULL\n", + __func__); + return -EINVAL; + } + + /* burst_size */ + if ((params->burst_size == 0) || + (params->burst_size > RTE_PORT_IN_BURST_SIZE_MAX)) { + RTE_LOG(ERR, PIPELINE, "%s: invalid value for burst_size\n", + __func__); + return -EINVAL; + } + + /* Do we have room for one more port? */ + if (p->num_ports_in == RTE_PIPELINE_PORT_IN_MAX) { + RTE_LOG(ERR, PIPELINE, + "%s: invalid value for num_ports_in\n", __func__); + return -EINVAL; + } + + return 0; +} + +static int +rte_pipeline_port_out_check_params(struct rte_pipeline *p, + struct rte_pipeline_port_out_params *params, + uint32_t *port_id) +{ + rte_pipeline_port_out_action_handler f_ah; + rte_pipeline_port_out_action_handler_bulk f_ah_bulk; + + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + if (params == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: params parameter NULL\n", __func__); + return -EINVAL; + } + + if (port_id == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: port_id parameter NULL\n", + __func__); + return -EINVAL; + } + + /* ops */ + if (params->ops == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: params->ops parameter NULL\n", + __func__); + return -EINVAL; + } + + if (params->ops->f_create == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: f_create function pointer NULL\n", __func__); + return -EINVAL; + } + + if (params->ops->f_tx == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: f_tx function pointer NULL\n", __func__); + return -EINVAL; + } + + if (params->ops->f_tx_bulk == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: f_tx_bulk function pointer NULL\n", __func__); + return -EINVAL; + } + + f_ah = params->f_action; + f_ah_bulk = params->f_action_bulk; + if (((f_ah != NULL) && (f_ah_bulk == NULL)) || + ((f_ah == NULL) && (f_ah_bulk != NULL))) { + RTE_LOG(ERR, PIPELINE, "%s: Action handlers have to be either" + "both enabled or both disabled\n", __func__); + return -EINVAL; + } + + /* Do we have room for one more port? */ + if (p->num_ports_out == RTE_PIPELINE_PORT_OUT_MAX) { + RTE_LOG(ERR, PIPELINE, + "%s: invalid value for num_ports_out\n", __func__); + return -EINVAL; + } + + return 0; +} + +int +rte_pipeline_port_in_create(struct rte_pipeline *p, + struct rte_pipeline_port_in_params *params, + uint32_t *port_id) +{ + struct rte_port_in *port; + void *h_port; + uint32_t id; + int status; + + /* Check input arguments */ + status = rte_pipeline_port_in_check_params(p, params, port_id); + if (status != 0) + return status; + + id = p->num_ports_in; + port = &p->ports_in[id]; + + /* Create the port */ + h_port = params->ops->f_create(params->arg_create, p->socket_id); + if (h_port == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: Port creation failed\n", __func__); + return -EINVAL; + } + + /* Commit current table to the pipeline */ + p->num_ports_in++; + *port_id = id; + + /* Save input parameters */ + memcpy(&port->ops, params->ops, sizeof(struct rte_port_in_ops)); + port->f_action = params->f_action; + port->arg_ah = params->arg_ah; + port->burst_size = params->burst_size; + + /* Initialize port internal data structure */ + port->table_id = RTE_TABLE_INVALID; + port->h_port = h_port; + port->next = NULL; + + return 0; +} + +void +rte_pipeline_port_in_free(struct rte_port_in *port) +{ + if (port->ops.f_free != NULL) + port->ops.f_free(port->h_port); +} + +int +rte_pipeline_port_out_create(struct rte_pipeline *p, + struct rte_pipeline_port_out_params *params, + uint32_t *port_id) +{ + struct rte_port_out *port; + void *h_port; + uint32_t id; + int status; + + /* Check input arguments */ + status = rte_pipeline_port_out_check_params(p, params, port_id); + if (status != 0) + return status; + + id = p->num_ports_out; + port = &p->ports_out[id]; + + /* Create the port */ + h_port = params->ops->f_create(params->arg_create, p->socket_id); + if (h_port == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: Port creation failed\n", __func__); + return -EINVAL; + } + + /* Commit current table to the pipeline */ + p->num_ports_out++; + *port_id = id; + + /* Save input parameters */ + memcpy(&port->ops, params->ops, sizeof(struct rte_port_out_ops)); + port->f_action = params->f_action; + port->f_action_bulk = params->f_action_bulk; + port->arg_ah = params->arg_ah; + + /* Initialize port internal data structure */ + port->h_port = h_port; + + return 0; +} + +void +rte_pipeline_port_out_free(struct rte_port_out *port) +{ + if (port->ops.f_free != NULL) + port->ops.f_free(port->h_port); +} + +int +rte_pipeline_port_in_connect_to_table(struct rte_pipeline *p, + uint32_t port_id, + uint32_t table_id) +{ + struct rte_port_in *port; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + if (port_id >= p->num_ports_in) { + RTE_LOG(ERR, PIPELINE, + "%s: port IN ID %u is out of range\n", + __func__, port_id); + return -EINVAL; + } + + if (table_id >= p->num_tables) { + RTE_LOG(ERR, PIPELINE, + "%s: Table ID %u is out of range\n", + __func__, table_id); + return -EINVAL; + } + + port = &p->ports_in[port_id]; + port->table_id = table_id; + + return 0; +} + +int +rte_pipeline_port_in_enable(struct rte_pipeline *p, uint32_t port_id) +{ + struct rte_port_in *port, *port_prev, *port_next; + struct rte_port_in *port_first, *port_last; + uint64_t port_mask; + uint32_t port_prev_id, port_next_id, port_first_id, port_last_id; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + if (port_id >= p->num_ports_in) { + RTE_LOG(ERR, PIPELINE, + "%s: port IN ID %u is out of range\n", + __func__, port_id); + return -EINVAL; + } + + /* Return if current input port is already enabled */ + port_mask = 1LLU << port_id; + if (p->enabled_port_in_mask & port_mask) + return 0; + + p->enabled_port_in_mask |= port_mask; + + /* Add current input port to the pipeline chain of enabled ports */ + port_prev_id = rte_mask_get_prev(p->enabled_port_in_mask, port_id); + port_next_id = rte_mask_get_next(p->enabled_port_in_mask, port_id); + + port_prev = &p->ports_in[port_prev_id]; + port_next = &p->ports_in[port_next_id]; + port = &p->ports_in[port_id]; + + port_prev->next = port; + port->next = port_next; + + /* Update the first and last input ports in the chain */ + port_first_id = __builtin_ctzll(p->enabled_port_in_mask); + port_last_id = 63 - __builtin_clzll(p->enabled_port_in_mask); + + port_first = &p->ports_in[port_first_id]; + port_last = &p->ports_in[port_last_id]; + + p->port_in_first = port_first; + port_last->next = NULL; + + return 0; +} + +int +rte_pipeline_port_in_disable(struct rte_pipeline *p, uint32_t port_id) +{ + struct rte_port_in *port_prev, *port_next, *port_first, *port_last; + uint64_t port_mask; + uint32_t port_prev_id, port_next_id, port_first_id, port_last_id; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + if (port_id >= p->num_ports_in) { + RTE_LOG(ERR, PIPELINE, "%s: port IN ID %u is out of range\n", + __func__, port_id); + return -EINVAL; + } + + /* Return if current input port is already disabled */ + port_mask = 1LLU << port_id; + if ((p->enabled_port_in_mask & port_mask) == 0) + return 0; + + /* Return if no other enabled ports */ + if (__builtin_popcountll(p->enabled_port_in_mask) == 1) { + p->enabled_port_in_mask &= ~port_mask; + p->port_in_first = NULL; + + return 0; + } + + /* Add current input port to the pipeline chain of enabled ports */ + port_prev_id = rte_mask_get_prev(p->enabled_port_in_mask, port_id); + port_next_id = rte_mask_get_next(p->enabled_port_in_mask, port_id); + + port_prev = &p->ports_in[port_prev_id]; + port_next = &p->ports_in[port_next_id]; + + port_prev->next = port_next; + p->enabled_port_in_mask &= ~port_mask; + + /* Update the first and last input ports in the chain */ + port_first_id = __builtin_ctzll(p->enabled_port_in_mask); + port_last_id = 63 - __builtin_clzll(p->enabled_port_in_mask); + + port_first = &p->ports_in[port_first_id]; + port_last = &p->ports_in[port_last_id]; + + p->port_in_first = port_first; + port_last->next = NULL; + + return 0; +} + +/* + * Pipeline run-time + * + */ +int +rte_pipeline_check(struct rte_pipeline *p) +{ + uint32_t port_in_id; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + /* Check that pipeline has at least one input port, one table and one + output port */ + if (p->num_ports_in == 0) { + RTE_LOG(ERR, PIPELINE, "%s: must have at least 1 input port\n", + __func__); + return -EINVAL; + } + if (p->num_tables == 0) { + RTE_LOG(ERR, PIPELINE, "%s: must have at least 1 table\n", + __func__); + return -EINVAL; + } + if (p->num_ports_out == 0) { + RTE_LOG(ERR, PIPELINE, "%s: must have at least 1 output port\n", + __func__); + return -EINVAL; + } + + /* Check that all input ports are connected */ + for (port_in_id = 0; port_in_id < p->num_ports_in; port_in_id++) { + struct rte_port_in *port_in = &p->ports_in[port_in_id]; + + if (port_in->table_id == RTE_TABLE_INVALID) { + RTE_LOG(ERR, PIPELINE, + "%s: Port IN ID %u is not connected\n", + __func__, port_in_id); + return -EINVAL; + } + } + + return 0; +} + +static inline void +rte_pipeline_compute_masks(struct rte_pipeline *p, uint64_t pkts_mask) +{ + p->action_mask1[RTE_PIPELINE_ACTION_DROP] = 0; + p->action_mask1[RTE_PIPELINE_ACTION_PORT] = 0; + p->action_mask1[RTE_PIPELINE_ACTION_PORT_META] = 0; + p->action_mask1[RTE_PIPELINE_ACTION_TABLE] = 0; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) { + uint64_t pkt_mask = 1LLU << i; + uint32_t pos = p->entries[i]->action; + + p->action_mask1[pos] |= pkt_mask; + } + } else { + uint32_t i; + + for (i = 0; i < RTE_PORT_IN_BURST_SIZE_MAX; i++) { + uint64_t pkt_mask = 1LLU << i; + uint32_t pos; + + if ((pkt_mask & pkts_mask) == 0) + continue; + + pos = p->entries[i]->action; + p->action_mask1[pos] |= pkt_mask; + } + } +} + +static inline void +rte_pipeline_action_handler_port_bulk(struct rte_pipeline *p, + uint64_t pkts_mask, uint32_t port_id) +{ + struct rte_port_out *port_out = &p->ports_out[port_id]; + + /* Output port user actions */ + if (port_out->f_action_bulk != NULL) { + uint64_t mask = pkts_mask; + + port_out->f_action_bulk(p->pkts, &pkts_mask, port_out->arg_ah); + p->action_mask0[RTE_PIPELINE_ACTION_DROP] |= pkts_mask ^ mask; + RTE_PIPELINE_STATS_ADD_M(port_out->n_pkts_dropped_by_ah, + pkts_mask ^ mask); + } + + /* Output port TX */ + if (pkts_mask != 0) + port_out->ops.f_tx_bulk(port_out->h_port, p->pkts, pkts_mask); +} + +static inline void +rte_pipeline_action_handler_port(struct rte_pipeline *p, uint64_t pkts_mask) +{ + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = p->pkts[i]; + uint32_t port_out_id = p->entries[i]->port_id; + struct rte_port_out *port_out = + &p->ports_out[port_out_id]; + + /* Output port user actions */ + if (port_out->f_action == NULL) /* Output port TX */ + port_out->ops.f_tx(port_out->h_port, pkt); + else { + uint64_t pkt_mask = 1LLU; + + port_out->f_action(pkt, &pkt_mask, + port_out->arg_ah); + p->action_mask0[RTE_PIPELINE_ACTION_DROP] |= + (pkt_mask ^ 1LLU) << i; + + RTE_PIPELINE_STATS_ADD(port_out->n_pkts_dropped_by_ah, + pkt_mask ^ 1LLU); + + /* Output port TX */ + if (pkt_mask != 0) + port_out->ops.f_tx(port_out->h_port, + pkt); + } + } + } else { + uint32_t i; + + for (i = 0; i < RTE_PORT_IN_BURST_SIZE_MAX; i++) { + uint64_t pkt_mask = 1LLU << i; + struct rte_mbuf *pkt; + struct rte_port_out *port_out; + uint32_t port_out_id; + + if ((pkt_mask & pkts_mask) == 0) + continue; + + pkt = p->pkts[i]; + port_out_id = p->entries[i]->port_id; + port_out = &p->ports_out[port_out_id]; + + /* Output port user actions */ + if (port_out->f_action == NULL) /* Output port TX */ + port_out->ops.f_tx(port_out->h_port, pkt); + else { + pkt_mask = 1LLU; + + port_out->f_action(pkt, &pkt_mask, + port_out->arg_ah); + p->action_mask0[RTE_PIPELINE_ACTION_DROP] |= + (pkt_mask ^ 1LLU) << i; + + RTE_PIPELINE_STATS_ADD(port_out->n_pkts_dropped_by_ah, + pkt_mask ^ 1LLU); + + /* Output port TX */ + if (pkt_mask != 0) + port_out->ops.f_tx(port_out->h_port, + pkt); + } + } + } +} + +static inline void +rte_pipeline_action_handler_port_meta(struct rte_pipeline *p, + uint64_t pkts_mask) +{ + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = p->pkts[i]; + uint32_t port_out_id = + RTE_MBUF_METADATA_UINT32(pkt, + p->offset_port_id); + struct rte_port_out *port_out = &p->ports_out[ + port_out_id]; + + /* Output port user actions */ + if (port_out->f_action == NULL) /* Output port TX */ + port_out->ops.f_tx(port_out->h_port, pkt); + else { + uint64_t pkt_mask = 1LLU; + + port_out->f_action(pkt, &pkt_mask, + port_out->arg_ah); + p->action_mask0[RTE_PIPELINE_ACTION_DROP] |= + (pkt_mask ^ 1LLU) << i; + + RTE_PIPELINE_STATS_ADD(port_out->n_pkts_dropped_by_ah, + pkt_mask ^ 1ULL); + + /* Output port TX */ + if (pkt_mask != 0) + port_out->ops.f_tx(port_out->h_port, + pkt); + } + } + } else { + uint32_t i; + + for (i = 0; i < RTE_PORT_IN_BURST_SIZE_MAX; i++) { + uint64_t pkt_mask = 1LLU << i; + struct rte_mbuf *pkt; + struct rte_port_out *port_out; + uint32_t port_out_id; + + if ((pkt_mask & pkts_mask) == 0) + continue; + + pkt = p->pkts[i]; + port_out_id = RTE_MBUF_METADATA_UINT32(pkt, + p->offset_port_id); + port_out = &p->ports_out[port_out_id]; + + /* Output port user actions */ + if (port_out->f_action == NULL) /* Output port TX */ + port_out->ops.f_tx(port_out->h_port, pkt); + else { + pkt_mask = 1LLU; + + port_out->f_action(pkt, &pkt_mask, + port_out->arg_ah); + p->action_mask0[RTE_PIPELINE_ACTION_DROP] |= + (pkt_mask ^ 1LLU) << i; + + RTE_PIPELINE_STATS_ADD(port_out->n_pkts_dropped_by_ah, + pkt_mask ^ 1ULL); + + /* Output port TX */ + if (pkt_mask != 0) + port_out->ops.f_tx(port_out->h_port, + pkt); + } + } + } +} + +static inline void +rte_pipeline_action_handler_drop(struct rte_pipeline *p, uint64_t pkts_mask) +{ + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) + rte_pktmbuf_free(p->pkts[i]); + } else { + uint32_t i; + + for (i = 0; i < RTE_PORT_IN_BURST_SIZE_MAX; i++) { + uint64_t pkt_mask = 1LLU << i; + + if ((pkt_mask & pkts_mask) == 0) + continue; + + rte_pktmbuf_free(p->pkts[i]); + } + } +} + +int +rte_pipeline_run(struct rte_pipeline *p) +{ + struct rte_port_in *port_in; + + for (port_in = p->port_in_first; port_in != NULL; + port_in = port_in->next) { + uint64_t pkts_mask; + uint32_t n_pkts, table_id; + + /* Input port RX */ + n_pkts = port_in->ops.f_rx(port_in->h_port, p->pkts, + port_in->burst_size); + if (n_pkts == 0) + continue; + + pkts_mask = RTE_LEN2MASK(n_pkts, uint64_t); + p->action_mask0[RTE_PIPELINE_ACTION_DROP] = 0; + p->action_mask0[RTE_PIPELINE_ACTION_PORT] = 0; + p->action_mask0[RTE_PIPELINE_ACTION_PORT_META] = 0; + p->action_mask0[RTE_PIPELINE_ACTION_TABLE] = 0; + + /* Input port user actions */ + if (port_in->f_action != NULL) { + uint64_t mask = pkts_mask; + + port_in->f_action(p->pkts, n_pkts, &pkts_mask, port_in->arg_ah); + mask ^= pkts_mask; + p->action_mask0[RTE_PIPELINE_ACTION_DROP] |= mask; + RTE_PIPELINE_STATS_ADD_M(port_in->n_pkts_dropped_by_ah, mask); + } + + /* Table */ + for (table_id = port_in->table_id; pkts_mask != 0; ) { + struct rte_table *table; + uint64_t lookup_hit_mask, lookup_miss_mask; + + /* Lookup */ + table = &p->tables[table_id]; + table->ops.f_lookup(table->h_table, p->pkts, pkts_mask, + &lookup_hit_mask, (void **) p->entries); + lookup_miss_mask = pkts_mask & (~lookup_hit_mask); + + /* Lookup miss */ + if (lookup_miss_mask != 0) { + struct rte_pipeline_table_entry *default_entry = + table->default_entry; + + /* Table user actions */ + if (table->f_action_miss != NULL) { + uint64_t mask = lookup_miss_mask; + + table->f_action_miss(p->pkts, + &lookup_miss_mask, + default_entry, table->arg_ah); + mask ^= lookup_miss_mask; + p->action_mask0[RTE_PIPELINE_ACTION_DROP] |= mask; + RTE_PIPELINE_STATS_ADD_M( + table->n_pkts_dropped_by_lkp_miss_ah, mask); + } + + /* Table reserved actions */ + if ((default_entry->action == + RTE_PIPELINE_ACTION_PORT) && + (lookup_miss_mask != 0)) + rte_pipeline_action_handler_port_bulk(p, + lookup_miss_mask, + default_entry->port_id); + else { + uint32_t pos = default_entry->action; + + p->action_mask0[pos] = lookup_miss_mask; + if (pos == RTE_PIPELINE_ACTION_DROP) { + RTE_PIPELINE_STATS_ADD_M(table->n_pkts_dropped_lkp_miss, + lookup_miss_mask); + } + } + } + + /* Lookup hit */ + if (lookup_hit_mask != 0) { + /* Table user actions */ + if (table->f_action_hit != NULL) { + uint64_t mask = lookup_hit_mask; + + table->f_action_hit(p->pkts, + &lookup_hit_mask, + p->entries, table->arg_ah); + mask ^= lookup_hit_mask; + p->action_mask0[RTE_PIPELINE_ACTION_DROP] |= mask; + RTE_PIPELINE_STATS_ADD_M( + table->n_pkts_dropped_by_lkp_hit_ah, mask); + } + + /* Table reserved actions */ + rte_pipeline_compute_masks(p, lookup_hit_mask); + p->action_mask0[RTE_PIPELINE_ACTION_DROP] |= + p->action_mask1[ + RTE_PIPELINE_ACTION_DROP]; + p->action_mask0[RTE_PIPELINE_ACTION_PORT] |= + p->action_mask1[ + RTE_PIPELINE_ACTION_PORT]; + p->action_mask0[RTE_PIPELINE_ACTION_PORT_META] |= + p->action_mask1[ + RTE_PIPELINE_ACTION_PORT_META]; + p->action_mask0[RTE_PIPELINE_ACTION_TABLE] |= + p->action_mask1[ + RTE_PIPELINE_ACTION_TABLE]; + + RTE_PIPELINE_STATS_ADD_M(table->n_pkts_dropped_lkp_hit, + p->action_mask1[RTE_PIPELINE_ACTION_DROP]); + } + + /* Prepare for next iteration */ + pkts_mask = p->action_mask0[RTE_PIPELINE_ACTION_TABLE]; + table_id = table->table_next_id; + p->action_mask0[RTE_PIPELINE_ACTION_TABLE] = 0; + } + + /* Table reserved action PORT */ + rte_pipeline_action_handler_port(p, + p->action_mask0[RTE_PIPELINE_ACTION_PORT]); + + /* Table reserved action PORT META */ + rte_pipeline_action_handler_port_meta(p, + p->action_mask0[RTE_PIPELINE_ACTION_PORT_META]); + + /* Table reserved action DROP */ + rte_pipeline_action_handler_drop(p, + p->action_mask0[RTE_PIPELINE_ACTION_DROP]); + } + + return 0; +} + +int +rte_pipeline_flush(struct rte_pipeline *p) +{ + uint32_t port_id; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + for (port_id = 0; port_id < p->num_ports_out; port_id++) { + struct rte_port_out *port = &p->ports_out[port_id]; + + if (port->ops.f_flush != NULL) + port->ops.f_flush(port->h_port); + } + + return 0; +} + +int +rte_pipeline_port_out_packet_insert(struct rte_pipeline *p, + uint32_t port_id, struct rte_mbuf *pkt) +{ + struct rte_port_out *port_out = &p->ports_out[port_id]; + + /* Output port user actions */ + if (port_out->f_action == NULL) + port_out->ops.f_tx(port_out->h_port, pkt); /* Output port TX */ + else { + uint64_t pkt_mask = 1LLU; + + port_out->f_action(pkt, &pkt_mask, port_out->arg_ah); + + if (pkt_mask != 0) /* Output port TX */ + port_out->ops.f_tx(port_out->h_port, pkt); + else { + rte_pktmbuf_free(pkt); + RTE_PIPELINE_STATS_ADD(port_out->n_pkts_dropped_by_ah, 1); + } + } + + return 0; +} + +int rte_pipeline_port_in_stats_read(struct rte_pipeline *p, uint32_t port_id, + struct rte_pipeline_port_in_stats *stats, int clear) +{ + struct rte_port_in *port; + int retval; + + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + if (port_id >= p->num_ports_in) { + RTE_LOG(ERR, PIPELINE, + "%s: port IN ID %u is out of range\n", + __func__, port_id); + return -EINVAL; + } + + port = &p->ports_in[port_id]; + + if (port->ops.f_stats != NULL) { + retval = port->ops.f_stats(port->h_port, &stats->stats, clear); + if (retval) + return retval; + } else if (stats != NULL) + memset(&stats->stats, 0, sizeof(stats->stats)); + + if (stats != NULL) + stats->n_pkts_dropped_by_ah = port->n_pkts_dropped_by_ah; + + if (clear != 0) + port->n_pkts_dropped_by_ah = 0; + + return 0; +} + +int rte_pipeline_port_out_stats_read(struct rte_pipeline *p, uint32_t port_id, + struct rte_pipeline_port_out_stats *stats, int clear) +{ + struct rte_port_out *port; + int retval; + + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", __func__); + return -EINVAL; + } + + if (port_id >= p->num_ports_out) { + RTE_LOG(ERR, PIPELINE, + "%s: port OUT ID %u is out of range\n", __func__, port_id); + return -EINVAL; + } + + port = &p->ports_out[port_id]; + if (port->ops.f_stats != NULL) { + retval = port->ops.f_stats(port->h_port, &stats->stats, clear); + if (retval != 0) + return retval; + } else if (stats != NULL) + memset(&stats->stats, 0, sizeof(stats->stats)); + + if (stats != NULL) + stats->n_pkts_dropped_by_ah = port->n_pkts_dropped_by_ah; + + if (clear != 0) + port->n_pkts_dropped_by_ah = 0; + + return 0; +} + +int rte_pipeline_table_stats_read(struct rte_pipeline *p, uint32_t table_id, + struct rte_pipeline_table_stats *stats, int clear) +{ + struct rte_table *table; + int retval; + + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + if (table_id >= p->num_tables) { + RTE_LOG(ERR, PIPELINE, + "%s: table %u is out of range\n", __func__, table_id); + return -EINVAL; + } + + table = &p->tables[table_id]; + if (table->ops.f_stats != NULL) { + retval = table->ops.f_stats(table->h_table, &stats->stats, clear); + if (retval != 0) + return retval; + } else if (stats != NULL) + memset(&stats->stats, 0, sizeof(stats->stats)); + + if (stats != NULL) { + stats->n_pkts_dropped_by_lkp_hit_ah = + table->n_pkts_dropped_by_lkp_hit_ah; + stats->n_pkts_dropped_by_lkp_miss_ah = + table->n_pkts_dropped_by_lkp_miss_ah; + stats->n_pkts_dropped_lkp_hit = table->n_pkts_dropped_lkp_hit; + stats->n_pkts_dropped_lkp_miss = table->n_pkts_dropped_lkp_miss; + } + + if (clear != 0) { + table->n_pkts_dropped_by_lkp_hit_ah = 0; + table->n_pkts_dropped_by_lkp_miss_ah = 0; + table->n_pkts_dropped_lkp_hit = 0; + table->n_pkts_dropped_lkp_miss = 0; + } + + return 0; +} diff --git a/src/dpdk22/lib/librte_pipeline/rte_pipeline.h b/src/dpdk22/lib/librte_pipeline/rte_pipeline.h new file mode 100644 index 00000000..54593245 --- /dev/null +++ b/src/dpdk22/lib/librte_pipeline/rte_pipeline.h @@ -0,0 +1,827 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PIPELINE_H__ +#define __INCLUDE_RTE_PIPELINE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Pipeline + * + * This tool is part of the Intel DPDK Packet Framework tool suite and provides + * a standard methodology (logically similar to OpenFlow) for rapid development + * of complex packet processing pipelines out of ports, tables and actions. + * + * Basic operation. A pipeline is constructed by connecting its input + * ports to its output ports through a chain of lookup tables. As result of + * lookup operation into the current table, one of the table entries (or the + * default table entry, in case of lookup miss) is identified to provide the + * actions to be executed on the current packet and the associated action + * meta-data. The behavior of user actions is defined through the configurable + * table action handler, while the reserved actions define the next hop for the + * current packet (either another table, an output port or packet drop) and are + * handled transparently by the framework. + * + * Initialization and run-time flows. Once all the pipeline elements + * (input ports, tables, output ports) have been created, input ports connected + * to tables, table action handlers configured, tables populated with the + * initial set of entries (actions and action meta-data) and input ports + * enabled, the pipeline runs automatically, pushing packets from input ports + * to tables and output ports. At each table, the identified user actions are + * being executed, resulting in action meta-data (stored in the table entry) + * and packet meta-data (stored with the packet descriptor) being updated. The + * pipeline tables can have further updates and input ports can be disabled or + * enabled later on as required. + * + * Multi-core scaling. Typically, each CPU core will run its own + * pipeline instance. Complex application-level pipelines can be implemented by + * interconnecting multiple CPU core-level pipelines in tree-like topologies, + * as the same port devices (e.g. SW rings) can serve as output ports for the + * pipeline running on CPU core A, as well as input ports for the pipeline + * running on CPU core B. This approach enables the application development + * using the pipeline (CPU cores connected serially), cluster/run-to-completion + * (CPU cores connected in parallel) or mixed (pipeline of CPU core clusters) + * programming models. + * + * Thread safety. It is possible to have multiple pipelines running on + * the same CPU core, but it is not allowed (for thread safety reasons) to have + * multiple CPU cores running the same pipeline instance. + * + ***/ + +#include + +#include +#include + +struct rte_mbuf; + +/* + * Pipeline + * + */ +/** Opaque data type for pipeline */ +struct rte_pipeline; + +/** Parameters for pipeline creation */ +struct rte_pipeline_params { + /** Pipeline name */ + const char *name; + + /** CPU socket ID where memory for the pipeline and its elements (ports + and tables) should be allocated */ + int socket_id; + + /** Offset within packet meta-data to port_id to be used by action + "Send packet to output port read from packet meta-data". Has to be + 4-byte aligned. */ + uint32_t offset_port_id; +}; + +/** Pipeline port in stats. */ +struct rte_pipeline_port_in_stats { + /** Port in stats. */ + struct rte_port_in_stats stats; + + /** Number of packets dropped by action handler. */ + uint64_t n_pkts_dropped_by_ah; + +}; + +/** Pipeline port out stats. */ +struct rte_pipeline_port_out_stats { + /** Port out stats. */ + struct rte_port_out_stats stats; + + /** Number of packets dropped by action handler. */ + uint64_t n_pkts_dropped_by_ah; +}; + +/** Pipeline table stats. */ +struct rte_pipeline_table_stats { + /** Table stats. */ + struct rte_table_stats stats; + + /** Number of packets dropped by lookup hit action handler. */ + uint64_t n_pkts_dropped_by_lkp_hit_ah; + + /** Number of packets dropped by lookup miss action handler. */ + uint64_t n_pkts_dropped_by_lkp_miss_ah; + + /** Number of packets dropped by pipeline in behalf of this table based on + * on action specified in table entry. */ + uint64_t n_pkts_dropped_lkp_hit; + + /** Number of packets dropped by pipeline in behalf of this table based on + * on action specified in table entry. */ + uint64_t n_pkts_dropped_lkp_miss; +}; + +/** + * Pipeline create + * + * @param params + * Parameters for pipeline creation + * @return + * Handle to pipeline instance on success or NULL otherwise + */ +struct rte_pipeline *rte_pipeline_create(struct rte_pipeline_params *params); + +/** + * Pipeline free + * + * @param p + * Handle to pipeline instance + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_free(struct rte_pipeline *p); + +/** + * Pipeline consistency check + * + * @param p + * Handle to pipeline instance + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_check(struct rte_pipeline *p); + +/** + * Pipeline run + * + * @param p + * Handle to pipeline instance + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_run(struct rte_pipeline *p); + +/** + * Pipeline flush + * + * @param p + * Handle to pipeline instance + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_flush(struct rte_pipeline *p); + +/* + * Actions + * + */ +/** Reserved actions */ +enum rte_pipeline_action { + /** Drop the packet */ + RTE_PIPELINE_ACTION_DROP = 0, + + /** Send packet to output port */ + RTE_PIPELINE_ACTION_PORT, + + /** Send packet to output port read from packet meta-data */ + RTE_PIPELINE_ACTION_PORT_META, + + /** Send packet to table */ + RTE_PIPELINE_ACTION_TABLE, + + /** Number of reserved actions */ + RTE_PIPELINE_ACTIONS +}; + +/* + * Table + * + */ +/** Maximum number of tables allowed for any given pipeline instance. The + value of this parameter cannot be changed. */ +#define RTE_PIPELINE_TABLE_MAX 64 + +/** + * Head format for the table entry of any pipeline table. For any given + * pipeline table, all table entries should have the same size and format. For + * any given pipeline table, the table entry has to start with a head of this + * structure, which contains the reserved actions and their associated + * meta-data, and then optionally continues with user actions and their + * associated meta-data. As all the currently defined reserved actions are + * mutually exclusive, only one reserved action can be set per table entry. + */ +struct rte_pipeline_table_entry { + /** Reserved action */ + enum rte_pipeline_action action; + + union { + /** Output port ID (meta-data for "Send packet to output port" + action) */ + uint32_t port_id; + /** Table ID (meta-data for "Send packet to table" action) */ + uint32_t table_id; + }; + /** Start of table entry area for user defined actions and meta-data */ + uint8_t action_data[0]; +}; + +/** + * Pipeline table action handler on lookup hit + * + * The action handler can decide to drop packets by resetting the associated + * packet bit in the pkts_mask parameter. In this case, the action handler is + * required not to free the packet buffer, which will be freed eventually by + * the pipeline. + * + * @param pkts + * Burst of input packets specified as array of up to 64 pointers to struct + * rte_mbuf + * @param pkts_mask + * 64-bit bitmask specifying which packets in the input burst are valid. When + * pkts_mask bit n is set, then element n of pkts array is pointing to a + * valid packet and element n of entries array is pointing to a valid table + * entry associated with the packet, with the association typically done by + * the table lookup operation. Otherwise, element n of pkts array and element + * n of entries array will not be accessed. + * @param entries + * Set of table entries specified as array of up to 64 pointers to struct + * rte_pipeline_table_entry + * @param arg + * Opaque parameter registered by the user at the pipeline table creation + * time + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_pipeline_table_action_handler_hit)( + struct rte_mbuf **pkts, + uint64_t *pkts_mask, + struct rte_pipeline_table_entry **entries, + void *arg); + +/** + * Pipeline table action handler on lookup miss + * + * The action handler can decide to drop packets by resetting the associated + * packet bit in the pkts_mask parameter. In this case, the action handler is + * required not to free the packet buffer, which will be freed eventually by + * the pipeline. + * + * @param pkts + * Burst of input packets specified as array of up to 64 pointers to struct + * rte_mbuf + * @param pkts_mask + * 64-bit bitmask specifying which packets in the input burst are valid. When + * pkts_mask bit n is set, then element n of pkts array is pointing to a + * valid packet. Otherwise, element n of pkts array will not be accessed. + * @param entry + * Single table entry associated with all the valid packets from the input + * burst, specified as pointer to struct rte_pipeline_table_entry. + * This entry is the pipeline table default entry that is associated by the + * table lookup operation with the input packets that have resulted in lookup + * miss. + * @param arg + * Opaque parameter registered by the user at the pipeline table creation + * time + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_pipeline_table_action_handler_miss)( + struct rte_mbuf **pkts, + uint64_t *pkts_mask, + struct rte_pipeline_table_entry *entry, + void *arg); + +/** Parameters for pipeline table creation. Action handlers have to be either + both enabled or both disabled (they can be disabled by setting them to + NULL). */ +struct rte_pipeline_table_params { + /** Table operations (specific to each table type) */ + struct rte_table_ops *ops; + /** Opaque param to be passed to the table create operation when + invoked */ + void *arg_create; + /** Callback function to execute the user actions on input packets in + case of lookup hit */ + rte_pipeline_table_action_handler_hit f_action_hit; + /** Callback function to execute the user actions on input packets in + case of lookup miss */ + rte_pipeline_table_action_handler_miss f_action_miss; + + /** Opaque parameter to be passed to lookup hit and/or lookup miss + action handlers when invoked */ + void *arg_ah; + /** Memory size to be reserved per table entry for storing the user + actions and their meta-data */ + uint32_t action_data_size; +}; + +/** + * Pipeline table create + * + * @param p + * Handle to pipeline instance + * @param params + * Parameters for pipeline table creation + * @param table_id + * Table ID. Valid only within the scope of table IDs of the current + * pipeline. Only returned after a successful invocation. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_table_create(struct rte_pipeline *p, + struct rte_pipeline_table_params *params, + uint32_t *table_id); + +/** + * Pipeline table default entry add + * + * The contents of the table default entry is updated with the provided actions + * and meta-data. When the default entry is not configured (by using this + * function), the built-in default entry has the action "Drop" and meta-data + * set to all-zeros. + * + * @param p + * Handle to pipeline instance + * @param table_id + * Table ID (returned by previous invocation of pipeline table create) + * @param default_entry + * New contents for the table default entry + * @param default_entry_ptr + * On successful invocation, pointer to the default table entry which can be + * used for further read-write accesses to this table entry. This pointer + * is valid until the default entry is deleted or re-added. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_table_default_entry_add(struct rte_pipeline *p, + uint32_t table_id, + struct rte_pipeline_table_entry *default_entry, + struct rte_pipeline_table_entry **default_entry_ptr); + +/** + * Pipeline table default entry delete + * + * The new contents of the table default entry is set to reserved action "Drop + * the packet" with meta-data cleared (i.e. set to all-zeros). + * + * @param p + * Handle to pipeline instance + * @param table_id + * Table ID (returned by previous invocation of pipeline table create) + * @param entry + * On successful invocation, when entry points to a valid buffer, the + * previous contents of the table default entry (as it was just before the + * delete operation) is copied to this buffer + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_table_default_entry_delete(struct rte_pipeline *p, + uint32_t table_id, + struct rte_pipeline_table_entry *entry); + +/** + * Pipeline table entry add + * + * @param p + * Handle to pipeline instance + * @param table_id + * Table ID (returned by previous invocation of pipeline table create) + * @param key + * Table entry key + * @param entry + * New contents for the table entry identified by key + * @param key_found + * On successful invocation, set to TRUE (value different than 0) if key was + * already present in the table before the add operation and to FALSE (value + * 0) if not + * @param entry_ptr + * On successful invocation, pointer to the table entry associated with key. + * This can be used for further read-write accesses to this table entry and + * is valid until the key is deleted from the table or re-added (usually for + * associating different actions and/or action meta-data to the current key) + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_table_entry_add(struct rte_pipeline *p, + uint32_t table_id, + void *key, + struct rte_pipeline_table_entry *entry, + int *key_found, + struct rte_pipeline_table_entry **entry_ptr); + +/** + * Pipeline table entry delete + * + * @param p + * Handle to pipeline instance + * @param table_id + * Table ID (returned by previous invocation of pipeline table create) + * @param key + * Table entry key + * @param key_found + * On successful invocation, set to TRUE (value different than 0) if key was + * found in the table before the delete operation and to FALSE (value 0) if + * not + * @param entry + * On successful invocation, when key is found in the table and entry points + * to a valid buffer, the table entry contents (as it was before the delete + * was performed) is copied to this buffer + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_table_entry_delete(struct rte_pipeline *p, + uint32_t table_id, + void *key, + int *key_found, + struct rte_pipeline_table_entry *entry); + +/** + * Pipeline table entry add bulk + * + * @param p + * Handle to pipeline instance + * @param table_id + * Table ID (returned by previous invocation of pipeline table create) + * @param keys + * Array containing table entry keys + * @param entries + * Array containung new contents for every table entry identified by key + * @param n_keys + * Number of keys to add + * @param key_found + * On successful invocation, key_found for every item in the array is set to + * TRUE (value different than 0) if key was already present in the table + * before the add operation and to FALSE (value 0) if not + * @param entries_ptr + * On successful invocation, array *entries_ptr stores pointer to every table + * entry associated with key. This can be used for further read-write accesses + * to this table entry and is valid until the key is deleted from the table or + * re-added (usually for associating different actions and/or action meta-data + * to the current key) + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_table_entry_add_bulk(struct rte_pipeline *p, + uint32_t table_id, + void **keys, + struct rte_pipeline_table_entry **entries, + uint32_t n_keys, + int *key_found, + struct rte_pipeline_table_entry **entries_ptr); + +/** + * Pipeline table entry delete bulk + * + * @param p + * Handle to pipeline instance + * @param table_id + * Table ID (returned by previous invocation of pipeline table create) + * @param keys + * Array containing table entry keys + * @param n_keys + * Number of keys to delete + * @param key_found + * On successful invocation, key_found for every item in the array is set to + * TRUE (value different than 0) if key was found in the table before the + * delete operation and to FALSE (value 0) if not + * @param entries + * If entries pointer is NULL, this pointer is ignored for every entry found. + * Else, after successful invocation, if specific key is found in the table + * and entry points to a valid buffer, the table entry contents (as it was + * before the delete was performed) is copied to this buffer. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_table_entry_delete_bulk(struct rte_pipeline *p, + uint32_t table_id, + void **keys, + uint32_t n_keys, + int *key_found, + struct rte_pipeline_table_entry **entries); + +/** + * Read pipeline table stats. + * + * This function reads table statistics identified by *table_id* of given + * pipeline *p*. + * + * @param p + * Handle to pipeline instance. + * @param table_id + * Port ID what stats will be returned. + * @param stats + * Statistics buffer. + * @param clear + * If not 0 clear stats after reading. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_table_stats_read(struct rte_pipeline *p, uint32_t table_id, + struct rte_pipeline_table_stats *stats, int clear); + +/* + * Port IN + * + */ +/** Maximum number of input ports allowed for any given pipeline instance. The + value of this parameter cannot be changed. */ +#define RTE_PIPELINE_PORT_IN_MAX 64 + +/** + * Pipeline input port action handler + * + * The action handler can decide to drop packets by resetting the associated + * packet bit in the pkts_mask parameter. In this case, the action handler is + * required not to free the packet buffer, which will be freed eventually by + * the pipeline. + * + * @param pkts + * Burst of input packets specified as array of up to 64 pointers to struct + * rte_mbuf + * @param n + * Number of packets in the input burst. This parameter specifies that + * elements 0 to (n-1) of pkts array are valid. + * @param pkts_mask + * 64-bit bitmask specifying which packets in the input burst are still valid + * after the action handler is executed. When pkts_mask bit n is set, then + * element n of pkts array is pointing to a valid packet. + * @param arg + * Opaque parameter registered by the user at the pipeline table creation + * time + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_pipeline_port_in_action_handler)( + struct rte_mbuf **pkts, + uint32_t n, + uint64_t *pkts_mask, + void *arg); + +/** Parameters for pipeline input port creation */ +struct rte_pipeline_port_in_params { + /** Input port operations (specific to each table type) */ + struct rte_port_in_ops *ops; + /** Opaque parameter to be passed to create operation when invoked */ + void *arg_create; + + /** Callback function to execute the user actions on input packets. + Disabled if set to NULL. */ + rte_pipeline_port_in_action_handler f_action; + /** Opaque parameter to be passed to the action handler when invoked */ + void *arg_ah; + + /** Recommended burst size for the RX operation(in number of pkts) */ + uint32_t burst_size; +}; + +/** + * Pipeline input port create + * + * @param p + * Handle to pipeline instance + * @param params + * Parameters for pipeline input port creation + * @param port_id + * Input port ID. Valid only within the scope of input port IDs of the + * current pipeline. Only returned after a successful invocation. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_port_in_create(struct rte_pipeline *p, + struct rte_pipeline_port_in_params *params, + uint32_t *port_id); + +/** + * Pipeline input port connect to table + * + * @param p + * Handle to pipeline instance + * @param port_id + * Port ID (returned by previous invocation of pipeline input port create) + * @param table_id + * Table ID (returned by previous invocation of pipeline table create) + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_port_in_connect_to_table(struct rte_pipeline *p, + uint32_t port_id, + uint32_t table_id); + +/** + * Pipeline input port enable + * + * @param p + * Handle to pipeline instance + * @param port_id + * Port ID (returned by previous invocation of pipeline input port create) + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_port_in_enable(struct rte_pipeline *p, + uint32_t port_id); + +/** + * Pipeline input port disable + * + * @param p + * Handle to pipeline instance + * @param port_id + * Port ID (returned by previous invocation of pipeline input port create) + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_port_in_disable(struct rte_pipeline *p, + uint32_t port_id); + +/** + * Read pipeline port in stats. + * + * This function reads port in statistics identified by *port_id* of given + * pipeline *p*. + * + * @param p + * Handle to pipeline instance. + * @param port_id + * Port ID what stats will be returned. + * @param stats + * Statistics buffer. + * @param clear + * If not 0 clear stats after reading. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_port_in_stats_read(struct rte_pipeline *p, uint32_t port_id, + struct rte_pipeline_port_in_stats *stats, int clear); + +/* + * Port OUT + * + */ +/** Maximum number of output ports allowed for any given pipeline instance. The + value of this parameter cannot be changed. */ +#define RTE_PIPELINE_PORT_OUT_MAX 64 + +/** + * Pipeline output port action handler for single packet + * + * The action handler can decide to drop packets by resetting the pkt_mask + * argument. In this case, the action handler is required not to free the + * packet buffer, which will be freed eventually by the pipeline. + * + * @param pkt + * Input packet + * @param pkt_mask + * Output argument set to 0 when the action handler decides to drop the input + * packet and to 1LLU otherwise + * @param arg + * Opaque parameter registered by the user at the pipeline table creation + * time + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_pipeline_port_out_action_handler)( + struct rte_mbuf *pkt, + uint64_t *pkt_mask, + void *arg); + +/** + * Pipeline output port action handler bulk + * + * The action handler can decide to drop packets by resetting the associated + * packet bit in the pkts_mask parameter. In this case, the action handler is + * required not to free the packet buffer, which will be freed eventually by + * the pipeline. + * + * @param pkts + * Burst of input packets specified as array of up to 64 pointers to struct + * rte_mbuf + * @param pkts_mask + * 64-bit bitmask specifying which packets in the input burst are valid. When + * pkts_mask bit n is set, then element n of pkts array is pointing to a + * valid packet. Otherwise, element n of pkts array will not be accessed. + * @param arg + * Opaque parameter registered by the user at the pipeline table creation + * time + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_pipeline_port_out_action_handler_bulk)( + struct rte_mbuf **pkts, + uint64_t *pkts_mask, + void *arg); + +/** Parameters for pipeline output port creation. The action handlers have to +be either both enabled or both disabled (by setting them to NULL). When +enabled, the pipeline selects between them at different moments, based on the +number of packets that have to be sent to the same output port. */ +struct rte_pipeline_port_out_params { + /** Output port operations (specific to each table type) */ + struct rte_port_out_ops *ops; + /** Opaque parameter to be passed to create operation when invoked */ + void *arg_create; + + /** Callback function executing the user actions on single input + packet */ + rte_pipeline_port_out_action_handler f_action; + /** Callback function executing the user actions on bust of input + packets */ + rte_pipeline_port_out_action_handler_bulk f_action_bulk; + /** Opaque parameter to be passed to the action handler when invoked */ + void *arg_ah; +}; + +/** + * Pipeline output port create + * + * @param p + * Handle to pipeline instance + * @param params + * Parameters for pipeline output port creation + * @param port_id + * Output port ID. Valid only within the scope of output port IDs of the + * current pipeline. Only returned after a successful invocation. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_port_out_create(struct rte_pipeline *p, + struct rte_pipeline_port_out_params *params, + uint32_t *port_id); + +/** + * Pipeline output port packet insert + * + * This function is called by the table action handler whenever it generates a + * new packet to be sent out though one of the pipeline output ports. This + * packet is not part of the burst of input packets read from any of the + * pipeline input ports, so it is not an element of the pkts array input + * parameter of the table action handler. This packet can be dropped by the + * output port action handler. + * + * @param p + * Handle to pipeline instance + * @param port_id + * Output port ID (returned by previous invocation of pipeline output port + * create) to send the packet specified by pkt + * @param pkt + * New packet generated by the table action handler + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_port_out_packet_insert(struct rte_pipeline *p, + uint32_t port_id, + struct rte_mbuf *pkt); + +/** + * Read pipeline port out stats. + * + * This function reads port out statistics identified by *port_id* of given + * pipeline *p*. + * + * @param p + * Handle to pipeline instance. + * @param port_id + * Port ID what stats will be returned. + * @param stats + * Statistics buffer. + * @param clear + * If not 0 clear stats after reading. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_port_out_stats_read(struct rte_pipeline *p, uint32_t port_id, + struct rte_pipeline_port_out_stats *stats, int clear); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_port/rte_port.h b/src/dpdk22/lib/librte_port/rte_port.h new file mode 100644 index 00000000..00b97a91 --- /dev/null +++ b/src/dpdk22/lib/librte_port/rte_port.h @@ -0,0 +1,263 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_H__ +#define __INCLUDE_RTE_PORT_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port + * + * This tool is part of the Intel DPDK Packet Framework tool suite and provides + * a standard interface to implement different types of packet ports. + * + ***/ + +#include +#include + +/**@{ + * Macros to allow accessing metadata stored in the mbuf headroom + * just beyond the end of the mbuf data structure returned by a port + */ +#define RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset) \ + (&((uint8_t *)(mbuf))[offset]) +#define RTE_MBUF_METADATA_UINT16_PTR(mbuf, offset) \ + ((uint16_t *) RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset)) +#define RTE_MBUF_METADATA_UINT32_PTR(mbuf, offset) \ + ((uint32_t *) RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset)) +#define RTE_MBUF_METADATA_UINT64_PTR(mbuf, offset) \ + ((uint64_t *) RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset)) + +#define RTE_MBUF_METADATA_UINT8(mbuf, offset) \ + (*RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset)) +#define RTE_MBUF_METADATA_UINT16(mbuf, offset) \ + (*RTE_MBUF_METADATA_UINT16_PTR(mbuf, offset)) +#define RTE_MBUF_METADATA_UINT32(mbuf, offset) \ + (*RTE_MBUF_METADATA_UINT32_PTR(mbuf, offset)) +#define RTE_MBUF_METADATA_UINT64(mbuf, offset) \ + (*RTE_MBUF_METADATA_UINT64_PTR(mbuf, offset)) +/**@}*/ + +/* + * Port IN + * + */ +/** Maximum number of packets read from any input port in a single burst. +Cannot be changed. */ +#define RTE_PORT_IN_BURST_SIZE_MAX 64 + +/** Input port statistics */ +struct rte_port_in_stats { + uint64_t n_pkts_in; + uint64_t n_pkts_drop; +}; + +/** + * Input port create + * + * @param params + * Parameters for input port creation + * @param socket_id + * CPU socket ID (e.g. for memory allocation purpose) + * @return + * Handle to input port instance + */ +typedef void* (*rte_port_in_op_create)(void *params, int socket_id); + +/** + * Input port free + * + * @param port + * Handle to input port instance + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_in_op_free)(void *port); + +/** + * Input port packet burst RX + * + * @param port + * Handle to input port instance + * @param pkts + * Burst of input packets + * @param n_pkts + * Number of packets in the input burst + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_in_op_rx)( + void *port, + struct rte_mbuf **pkts, + uint32_t n_pkts); + +/** + * Input port stats get + * + * @param port + * Handle to output port instance + * @param stats + * Handle to port_in stats struct to copy data + * @param clear + * Flag indicating that stats should be cleared after read + * + * @return + * Error code or 0 on success. + */ +typedef int (*rte_port_in_op_stats_read)( + void *port, + struct rte_port_in_stats *stats, + int clear); + +/** Input port interface defining the input port operation */ +struct rte_port_in_ops { + rte_port_in_op_create f_create; /**< Create */ + rte_port_in_op_free f_free; /**< Free */ + rte_port_in_op_rx f_rx; /**< Packet RX (packet burst) */ + rte_port_in_op_stats_read f_stats; /**< Stats */ +}; + +/* + * Port OUT + * + */ +/** Output port statistics */ +struct rte_port_out_stats { + uint64_t n_pkts_in; + uint64_t n_pkts_drop; +}; + +/** + * Output port create + * + * @param params + * Parameters for output port creation + * @param socket_id + * CPU socket ID (e.g. for memory allocation purpose) + * @return + * Handle to output port instance + */ +typedef void* (*rte_port_out_op_create)(void *params, int socket_id); + +/** + * Output port free + * + * @param port + * Handle to output port instance + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_out_op_free)(void *port); + +/** + * Output port single packet TX + * + * @param port + * Handle to output port instance + * @param pkt + * Input packet + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_out_op_tx)( + void *port, + struct rte_mbuf *pkt); + +/** + * Output port packet burst TX + * + * @param port + * Handle to output port instance + * @param pkts + * Burst of input packets specified as array of up to 64 pointers to struct + * rte_mbuf + * @param pkts_mask + * 64-bit bitmask specifying which packets in the input burst are valid. When + * pkts_mask bit n is set, then element n of pkts array is pointing to a + * valid packet. Otherwise, element n of pkts array will not be accessed. + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_out_op_tx_bulk)( + void *port, + struct rte_mbuf **pkt, + uint64_t pkts_mask); + +/** + * Output port flush + * + * @param port + * Handle to output port instance + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_out_op_flush)(void *port); + +/** + * Output port stats read + * + * @param port + * Handle to output port instance + * @param stats + * Handle to port_out stats struct to copy data + * @param clear + * Flag indicating that stats should be cleared after read + * + * @return + * Error code or 0 on success. + */ +typedef int (*rte_port_out_op_stats_read)( + void *port, + struct rte_port_out_stats *stats, + int clear); + +/** Output port interface defining the output port operation */ +struct rte_port_out_ops { + rte_port_out_op_create f_create; /**< Create */ + rte_port_out_op_free f_free; /**< Free */ + rte_port_out_op_tx f_tx; /**< Packet TX (single packet) */ + rte_port_out_op_tx_bulk f_tx_bulk; /**< Packet TX (packet burst) */ + rte_port_out_op_flush f_flush; /**< Flush */ + rte_port_out_op_stats_read f_stats; /**< Stats */ +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_port/rte_port_ethdev.h b/src/dpdk22/lib/librte_port/rte_port_ethdev.h new file mode 100644 index 00000000..201a79e4 --- /dev/null +++ b/src/dpdk22/lib/librte_port/rte_port_ethdev.h @@ -0,0 +1,105 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_ETHDEV_H__ +#define __INCLUDE_RTE_PORT_ETHDEV_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port Ethernet Device + * + * ethdev_reader: input port built on top of pre-initialized NIC RX queue + * ethdev_writer: output port built on top of pre-initialized NIC TX queue + * + ***/ + +#include + +#include "rte_port.h" + +/** ethdev_reader port parameters */ +struct rte_port_ethdev_reader_params { + /** NIC RX port ID */ + uint8_t port_id; + + /** NIC RX queue ID */ + uint16_t queue_id; +}; + +/** ethdev_reader port operations */ +extern struct rte_port_in_ops rte_port_ethdev_reader_ops; + +/** ethdev_writer port parameters */ +struct rte_port_ethdev_writer_params { + /** NIC RX port ID */ + uint8_t port_id; + + /** NIC RX queue ID */ + uint16_t queue_id; + + /** Recommended burst size to NIC TX queue. The actual burst size can be + bigger or smaller than this value. */ + uint32_t tx_burst_sz; +}; + +/** ethdev_writer port operations */ +extern struct rte_port_out_ops rte_port_ethdev_writer_ops; + +/** ethdev_writer_nodrop port parameters */ +struct rte_port_ethdev_writer_nodrop_params { + /** NIC RX port ID */ + uint8_t port_id; + + /** NIC RX queue ID */ + uint16_t queue_id; + + /** Recommended burst size to NIC TX queue. The actual burst size can be + bigger or smaller than this value. */ + uint32_t tx_burst_sz; + + /** Maximum number of retries, 0 for no limit */ + uint32_t n_retries; +}; + +/** ethdev_writer_nodrop port operations */ +extern struct rte_port_out_ops rte_port_ethdev_writer_nodrop_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_port/rte_port_frag.h b/src/dpdk22/lib/librte_port/rte_port_frag.h new file mode 100644 index 00000000..0085ff7c --- /dev/null +++ b/src/dpdk22/lib/librte_port/rte_port_frag.h @@ -0,0 +1,101 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_IP_FRAG_H__ +#define __INCLUDE_RTE_PORT_IP_FRAG_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port for IPv4 Fragmentation + * + * This port is built on top of pre-initialized single consumer rte_ring. In + * order to minimize the amount of packets stored in the ring at any given + * time, the IP fragmentation functionality is executed on ring read operation, + * hence this port is implemented as an input port. A regular ring_writer port + * can be created to write to the same ring. + * + * The packets written to the ring are either complete IP datagrams or jumbo + * frames (i.e. IP packets with length bigger than provided MTU value). The + * packets read from the ring are all non-jumbo frames. The complete IP + * datagrams written to the ring are not changed. The jumbo frames are + * fragmented into several IP packets with length less or equal to MTU. + * + ***/ + +#include + +#include + +#include "rte_port.h" + +/** ring_reader_ipv4_frag port parameters */ +struct rte_port_ring_reader_frag_params { + /** Underlying single consumer ring that has to be pre-initialized. */ + struct rte_ring *ring; + + /** Maximum Transfer Unit (MTU). Maximum IP packet size (in bytes). */ + uint32_t mtu; + + /** Size of application dependent meta-data stored per each input packet + that has to be copied to each of the fragments originating from the + same input IP datagram. */ + uint32_t metadata_size; + + /** Pre-initialized buffer pool used for allocating direct buffers for + the output fragments. */ + struct rte_mempool *pool_direct; + + /** Pre-initialized buffer pool used for allocating indirect buffers for + the output fragments. */ + struct rte_mempool *pool_indirect; +}; + +#define rte_port_ring_reader_ipv4_frag_params rte_port_ring_reader_frag_params + +#define rte_port_ring_reader_ipv6_frag_params rte_port_ring_reader_frag_params + +/** ring_reader_ipv4_frag port operations */ +extern struct rte_port_in_ops rte_port_ring_reader_ipv4_frag_ops; + +/** ring_reader_ipv6_frag port operations */ +extern struct rte_port_in_ops rte_port_ring_reader_ipv6_frag_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_port/rte_port_ras.h b/src/dpdk22/lib/librte_port/rte_port_ras.h new file mode 100644 index 00000000..5a16f831 --- /dev/null +++ b/src/dpdk22/lib/librte_port/rte_port_ras.h @@ -0,0 +1,90 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_RAS_H__ +#define __INCLUDE_RTE_PORT_RAS_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port for IPv4 Reassembly + * + * This port is built on top of pre-initialized single producer rte_ring. In + * order to minimize the amount of packets stored in the ring at any given + * time, the IP reassembly functionality is executed on ring write operation, + * hence this port is implemented as an output port. A regular ring_reader port + * can be created to read from the same ring. + * + * The packets written to the ring are either complete IP datagrams or IP + * fragments. The packets read from the ring are all complete IP datagrams, + * either jumbo frames (i.e. IP packets with length bigger than MTU) or not. + * The complete IP datagrams written to the ring are not changed. The IP + * fragments written to the ring are first reassembled and into complete IP + * datagrams or dropped on error or IP reassembly time-out. + * + ***/ + +#include + +#include + +#include "rte_port.h" + +/** ring_writer_ipv4_ras port parameters */ +struct rte_port_ring_writer_ras_params { + /** Underlying single consumer ring that has to be pre-initialized. */ + struct rte_ring *ring; + + /** Recommended burst size to ring. The actual burst size can be bigger + or smaller than this value. */ + uint32_t tx_burst_sz; +}; + +#define rte_port_ring_writer_ipv4_ras_params rte_port_ring_writer_ras_params + +#define rte_port_ring_writer_ipv6_ras_params rte_port_ring_writer_ras_params + +/** ring_writer_ipv4_ras port operations */ +extern struct rte_port_out_ops rte_port_ring_writer_ipv4_ras_ops; + +/** ring_writer_ipv6_ras port operations */ +extern struct rte_port_out_ops rte_port_ring_writer_ipv6_ras_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_port/rte_port_ring.h b/src/dpdk22/lib/librte_port/rte_port_ring.h new file mode 100644 index 00000000..de377d28 --- /dev/null +++ b/src/dpdk22/lib/librte_port/rte_port_ring.h @@ -0,0 +1,123 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_RING_H__ +#define __INCLUDE_RTE_PORT_RING_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port Ring + * + * ring_reader: + * input port built on top of pre-initialized single consumer ring + * ring_writer: + * output port built on top of pre-initialized single producer ring + * ring_multi_reader: + * input port built on top of pre-initialized multi consumers ring + * ring_multi_writer: + * output port built on top of pre-initialized multi producers ring + * + ***/ + +#include + +#include + +#include "rte_port.h" + +/** ring_reader port parameters */ +struct rte_port_ring_reader_params { + /** Underlying consumer ring that has to be pre-initialized */ + struct rte_ring *ring; +}; + +/** ring_reader port operations */ +extern struct rte_port_in_ops rte_port_ring_reader_ops; + +/** ring_writer port parameters */ +struct rte_port_ring_writer_params { + /** Underlying producer ring that has to be pre-initialized */ + struct rte_ring *ring; + + /** Recommended burst size to ring. The actual burst size can be + bigger or smaller than this value. */ + uint32_t tx_burst_sz; +}; + +/** ring_writer port operations */ +extern struct rte_port_out_ops rte_port_ring_writer_ops; + +/** ring_writer_nodrop port parameters */ +struct rte_port_ring_writer_nodrop_params { + /** Underlying producer ring that has to be pre-initialized */ + struct rte_ring *ring; + + /** Recommended burst size to ring. The actual burst size can be + bigger or smaller than this value. */ + uint32_t tx_burst_sz; + + /** Maximum number of retries, 0 for no limit */ + uint32_t n_retries; +}; + +/** ring_writer_nodrop port operations */ +extern struct rte_port_out_ops rte_port_ring_writer_nodrop_ops; + +/** ring_multi_reader port parameters */ +#define rte_port_ring_multi_reader_params rte_port_ring_reader_params + +/** ring_multi_reader port operations */ +extern struct rte_port_in_ops rte_port_ring_multi_reader_ops; + +/** ring_multi_writer port parameters */ +#define rte_port_ring_multi_writer_params rte_port_ring_writer_params + +/** ring_multi_writer port operations */ +extern struct rte_port_out_ops rte_port_ring_multi_writer_ops; + +/** ring_multi_writer_nodrop port parameters */ +#define rte_port_ring_multi_writer_nodrop_params \ + rte_port_ring_writer_nodrop_params + +/** ring_multi_writer_nodrop port operations */ +extern struct rte_port_out_ops rte_port_ring_multi_writer_nodrop_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_port/rte_port_sched.h b/src/dpdk22/lib/librte_port/rte_port_sched.h new file mode 100644 index 00000000..555415ab --- /dev/null +++ b/src/dpdk22/lib/librte_port/rte_port_sched.h @@ -0,0 +1,82 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_SCHED_H__ +#define __INCLUDE_RTE_PORT_SCHED_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port Hierarchical Scheduler + * + * sched_reader: input port built on top of pre-initialized rte_sched_port + * sched_writer: output port built on top of pre-initialized rte_sched_port + * + ***/ + +#include + +#include + +#include "rte_port.h" + +/** sched_reader port parameters */ +struct rte_port_sched_reader_params { + /** Underlying pre-initialized rte_sched_port */ + struct rte_sched_port *sched; +}; + +/** sched_reader port operations */ +extern struct rte_port_in_ops rte_port_sched_reader_ops; + +/** sched_writer port parameters */ +struct rte_port_sched_writer_params { + /** Underlying pre-initialized rte_sched_port */ + struct rte_sched_port *sched; + + /** Recommended burst size. The actual burst size can be bigger or + smaller than this value. */ + uint32_t tx_burst_sz; +}; + +/** sched_writer port operations */ +extern struct rte_port_out_ops rte_port_sched_writer_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_port/rte_port_source_sink.h b/src/dpdk22/lib/librte_port/rte_port_source_sink.h new file mode 100644 index 00000000..0f9be799 --- /dev/null +++ b/src/dpdk22/lib/librte_port/rte_port_source_sink.h @@ -0,0 +1,70 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_SOURCE_SINK_H__ +#define __INCLUDE_RTE_PORT_SOURCE_SINK_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port Source/Sink + * + * source: input port that can be used to generate packets + * sink: output port that drops all packets written to it + * + ***/ + +#include "rte_port.h" + +/** source port parameters */ +struct rte_port_source_params { + /** Pre-initialized buffer pool */ + struct rte_mempool *mempool; +}; + +/** source port operations */ +extern struct rte_port_in_ops rte_port_source_ops; + +/** sink port parameters: NONE */ + +/** sink port operations */ +extern struct rte_port_out_ops rte_port_sink_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_ring/rte_ring.c b/src/dpdk22/lib/librte_ring/rte_ring.c new file mode 100644 index 00000000..d80faf3b --- /dev/null +++ b/src/dpdk22/lib/librte_ring/rte_ring.c @@ -0,0 +1,373 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Derived from FreeBSD's bufring.c + * + ************************************************************************** + * + * Copyright (c) 2007,2008 Kip Macy kmacy@freebsd.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. The name of Kip Macy nor the names of other + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ***************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_ring.h" + +TAILQ_HEAD(rte_ring_list, rte_tailq_entry); + +static struct rte_tailq_elem rte_ring_tailq = { + .name = RTE_TAILQ_RING_NAME, +}; +EAL_REGISTER_TAILQ(rte_ring_tailq) + +/* true if x is a power of 2 */ +#define POWEROF2(x) ((((x)-1) & (x)) == 0) + +/* return the size of memory occupied by a ring */ +ssize_t +rte_ring_get_memsize(unsigned count) +{ + ssize_t sz; + + /* count must be a power of 2 */ + if ((!POWEROF2(count)) || (count > RTE_RING_SZ_MASK )) { + RTE_LOG(ERR, RING, + "Requested size is invalid, must be power of 2, and " + "do not exceed the size limit %u\n", RTE_RING_SZ_MASK); + return -EINVAL; + } + + sz = sizeof(struct rte_ring) + count * sizeof(void *); + sz = RTE_ALIGN(sz, RTE_CACHE_LINE_SIZE); + return sz; +} + +int +rte_ring_init(struct rte_ring *r, const char *name, unsigned count, + unsigned flags) +{ + /* compilation-time checks */ + RTE_BUILD_BUG_ON((sizeof(struct rte_ring) & + RTE_CACHE_LINE_MASK) != 0); +#ifdef RTE_RING_SPLIT_PROD_CONS + RTE_BUILD_BUG_ON((offsetof(struct rte_ring, cons) & + RTE_CACHE_LINE_MASK) != 0); +#endif + RTE_BUILD_BUG_ON((offsetof(struct rte_ring, prod) & + RTE_CACHE_LINE_MASK) != 0); +#ifdef RTE_LIBRTE_RING_DEBUG + RTE_BUILD_BUG_ON((sizeof(struct rte_ring_debug_stats) & + RTE_CACHE_LINE_MASK) != 0); + RTE_BUILD_BUG_ON((offsetof(struct rte_ring, stats) & + RTE_CACHE_LINE_MASK) != 0); +#endif + + /* init the ring structure */ + memset(r, 0, sizeof(*r)); + snprintf(r->name, sizeof(r->name), "%s", name); + r->flags = flags; + r->prod.watermark = count; + r->prod.sp_enqueue = !!(flags & RING_F_SP_ENQ); + r->cons.sc_dequeue = !!(flags & RING_F_SC_DEQ); + r->prod.size = r->cons.size = count; + r->prod.mask = r->cons.mask = count-1; + r->prod.head = r->cons.head = 0; + r->prod.tail = r->cons.tail = 0; + + return 0; +} + +/* create the ring */ +struct rte_ring * +rte_ring_create(const char *name, unsigned count, int socket_id, + unsigned flags) +{ + char mz_name[RTE_MEMZONE_NAMESIZE]; + struct rte_ring *r; + struct rte_tailq_entry *te; + const struct rte_memzone *mz; + ssize_t ring_size; + int mz_flags = 0; + struct rte_ring_list* ring_list = NULL; + + ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list); + + ring_size = rte_ring_get_memsize(count); + if (ring_size < 0) { + rte_errno = ring_size; + return NULL; + } + + te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, RING, "Cannot reserve memory for tailq\n"); + rte_errno = ENOMEM; + return NULL; + } + + snprintf(mz_name, sizeof(mz_name), "%s%s", RTE_RING_MZ_PREFIX, name); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* reserve a memory zone for this ring. If we can't get rte_config or + * we are secondary process, the memzone_reserve function will set + * rte_errno for us appropriately - hence no check in this this function */ + mz = rte_memzone_reserve(mz_name, ring_size, socket_id, mz_flags); + if (mz != NULL) { + r = mz->addr; + /* no need to check return value here, we already checked the + * arguments above */ + rte_ring_init(r, name, count, flags); + + te->data = (void *) r; + r->memzone = mz; + + TAILQ_INSERT_TAIL(ring_list, te, next); + } else { + r = NULL; + RTE_LOG(ERR, RING, "Cannot reserve memory\n"); + rte_free(te); + } + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + return r; +} + +/* free the ring */ +void +rte_ring_free(struct rte_ring *r) +{ + struct rte_ring_list *ring_list = NULL; + struct rte_tailq_entry *te; + + if (r == NULL) + return; + + /* + * Ring was not created with rte_ring_create, + * therefore, there is no memzone to free. + */ + if (r->memzone == NULL) { + RTE_LOG(ERR, RING, "Cannot free ring (not created with rte_ring_create()"); + return; + } + + if (rte_memzone_free(r->memzone) != 0) { + RTE_LOG(ERR, RING, "Cannot free memory\n"); + return; + } + + ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list); + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* find out tailq entry */ + TAILQ_FOREACH(te, ring_list, next) { + if (te->data == (void *) r) + break; + } + + if (te == NULL) { + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + return; + } + + TAILQ_REMOVE(ring_list, te, next); + + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + rte_free(te); +} + +/* + * change the high water mark. If *count* is 0, water marking is + * disabled + */ +int +rte_ring_set_water_mark(struct rte_ring *r, unsigned count) +{ + if (count >= r->prod.size) + return -EINVAL; + + /* if count is 0, disable the watermarking */ + if (count == 0) + count = r->prod.size; + + r->prod.watermark = count; + return 0; +} + +/* dump the status of the ring on the console */ +void +rte_ring_dump(FILE *f, const struct rte_ring *r) +{ +#ifdef RTE_LIBRTE_RING_DEBUG + struct rte_ring_debug_stats sum; + unsigned lcore_id; +#endif + + fprintf(f, "ring <%s>@%p\n", r->name, r); + fprintf(f, " flags=%x\n", r->flags); + fprintf(f, " size=%"PRIu32"\n", r->prod.size); + fprintf(f, " ct=%"PRIu32"\n", r->cons.tail); + fprintf(f, " ch=%"PRIu32"\n", r->cons.head); + fprintf(f, " pt=%"PRIu32"\n", r->prod.tail); + fprintf(f, " ph=%"PRIu32"\n", r->prod.head); + fprintf(f, " used=%u\n", rte_ring_count(r)); + fprintf(f, " avail=%u\n", rte_ring_free_count(r)); + if (r->prod.watermark == r->prod.size) + fprintf(f, " watermark=0\n"); + else + fprintf(f, " watermark=%"PRIu32"\n", r->prod.watermark); + + /* sum and dump statistics */ +#ifdef RTE_LIBRTE_RING_DEBUG + memset(&sum, 0, sizeof(sum)); + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + sum.enq_success_bulk += r->stats[lcore_id].enq_success_bulk; + sum.enq_success_objs += r->stats[lcore_id].enq_success_objs; + sum.enq_quota_bulk += r->stats[lcore_id].enq_quota_bulk; + sum.enq_quota_objs += r->stats[lcore_id].enq_quota_objs; + sum.enq_fail_bulk += r->stats[lcore_id].enq_fail_bulk; + sum.enq_fail_objs += r->stats[lcore_id].enq_fail_objs; + sum.deq_success_bulk += r->stats[lcore_id].deq_success_bulk; + sum.deq_success_objs += r->stats[lcore_id].deq_success_objs; + sum.deq_fail_bulk += r->stats[lcore_id].deq_fail_bulk; + sum.deq_fail_objs += r->stats[lcore_id].deq_fail_objs; + } + fprintf(f, " size=%"PRIu32"\n", r->prod.size); + fprintf(f, " enq_success_bulk=%"PRIu64"\n", sum.enq_success_bulk); + fprintf(f, " enq_success_objs=%"PRIu64"\n", sum.enq_success_objs); + fprintf(f, " enq_quota_bulk=%"PRIu64"\n", sum.enq_quota_bulk); + fprintf(f, " enq_quota_objs=%"PRIu64"\n", sum.enq_quota_objs); + fprintf(f, " enq_fail_bulk=%"PRIu64"\n", sum.enq_fail_bulk); + fprintf(f, " enq_fail_objs=%"PRIu64"\n", sum.enq_fail_objs); + fprintf(f, " deq_success_bulk=%"PRIu64"\n", sum.deq_success_bulk); + fprintf(f, " deq_success_objs=%"PRIu64"\n", sum.deq_success_objs); + fprintf(f, " deq_fail_bulk=%"PRIu64"\n", sum.deq_fail_bulk); + fprintf(f, " deq_fail_objs=%"PRIu64"\n", sum.deq_fail_objs); +#else + fprintf(f, " no statistics available\n"); +#endif +} + +/* dump the status of all rings on the console */ +void +rte_ring_list_dump(FILE *f) +{ + const struct rte_tailq_entry *te; + struct rte_ring_list *ring_list; + + ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list); + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + + TAILQ_FOREACH(te, ring_list, next) { + rte_ring_dump(f, (struct rte_ring *) te->data); + } + + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); +} + +/* search a ring from its name */ +struct rte_ring * +rte_ring_lookup(const char *name) +{ + struct rte_tailq_entry *te; + struct rte_ring *r = NULL; + struct rte_ring_list *ring_list; + + ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list); + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + + TAILQ_FOREACH(te, ring_list, next) { + r = (struct rte_ring *) te->data; + if (strncmp(name, r->name, RTE_RING_NAMESIZE) == 0) + break; + } + + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + + return r; +} diff --git a/src/dpdk22/lib/librte_ring/rte_ring.h b/src/dpdk22/lib/librte_ring/rte_ring.h new file mode 100644 index 00000000..de036cef --- /dev/null +++ b/src/dpdk22/lib/librte_ring/rte_ring.h @@ -0,0 +1,1251 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Derived from FreeBSD's bufring.h + * + ************************************************************************** + * + * Copyright (c) 2007-2009 Kip Macy kmacy@freebsd.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. The name of Kip Macy nor the names of other + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ***************************************************************************/ + +#ifndef _RTE_RING_H_ +#define _RTE_RING_H_ + +/** + * @file + * RTE Ring + * + * The Ring Manager is a fixed-size queue, implemented as a table of + * pointers. Head and tail pointers are modified atomically, allowing + * concurrent access to it. It has the following features: + * + * - FIFO (First In First Out) + * - Maximum size is fixed; the pointers are stored in a table. + * - Lockless implementation. + * - Multi- or single-consumer dequeue. + * - Multi- or single-producer enqueue. + * - Bulk dequeue. + * - Bulk enqueue. + * + * Note: the ring implementation is not preemptable. A lcore must not + * be interrupted by another task that uses the same ring. + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RTE_TAILQ_RING_NAME "RTE_RING" + +enum rte_ring_queue_behavior { + RTE_RING_QUEUE_FIXED = 0, /* Enq/Deq a fixed number of items from a ring */ + RTE_RING_QUEUE_VARIABLE /* Enq/Deq as many items a possible from ring */ +}; + +#ifdef RTE_LIBRTE_RING_DEBUG +/** + * A structure that stores the ring statistics (per-lcore). + */ +struct rte_ring_debug_stats { + uint64_t enq_success_bulk; /**< Successful enqueues number. */ + uint64_t enq_success_objs; /**< Objects successfully enqueued. */ + uint64_t enq_quota_bulk; /**< Successful enqueues above watermark. */ + uint64_t enq_quota_objs; /**< Objects enqueued above watermark. */ + uint64_t enq_fail_bulk; /**< Failed enqueues number. */ + uint64_t enq_fail_objs; /**< Objects that failed to be enqueued. */ + uint64_t deq_success_bulk; /**< Successful dequeues number. */ + uint64_t deq_success_objs; /**< Objects successfully dequeued. */ + uint64_t deq_fail_bulk; /**< Failed dequeues number. */ + uint64_t deq_fail_objs; /**< Objects that failed to be dequeued. */ +} __rte_cache_aligned; +#endif + +#define RTE_RING_NAMESIZE 32 /**< The maximum length of a ring name. */ +#define RTE_RING_MZ_PREFIX "RG_" + +#ifndef RTE_RING_PAUSE_REP_COUNT +#define RTE_RING_PAUSE_REP_COUNT 0 /**< Yield after pause num of times, no yield + * if RTE_RING_PAUSE_REP not defined. */ +#endif + +struct rte_memzone; /* forward declaration, so as not to require memzone.h */ + +/** + * An RTE ring structure. + * + * The producer and the consumer have a head and a tail index. The particularity + * of these index is that they are not between 0 and size(ring). These indexes + * are between 0 and 2^32, and we mask their value when we access the ring[] + * field. Thanks to this assumption, we can do subtractions between 2 index + * values in a modulo-32bit base: that's why the overflow of the indexes is not + * a problem. + */ +struct rte_ring { + char name[RTE_RING_NAMESIZE]; /**< Name of the ring. */ + int flags; /**< Flags supplied at creation. */ + const struct rte_memzone *memzone; + /**< Memzone, if any, containing the rte_ring */ + + /** Ring producer status. */ + struct prod { + uint32_t watermark; /**< Maximum items before EDQUOT. */ + uint32_t sp_enqueue; /**< True, if single producer. */ + uint32_t size; /**< Size of ring. */ + uint32_t mask; /**< Mask (size-1) of ring. */ + volatile uint32_t head; /**< Producer head. */ + volatile uint32_t tail; /**< Producer tail. */ + } prod __rte_cache_aligned; + + /** Ring consumer status. */ + struct cons { + uint32_t sc_dequeue; /**< True, if single consumer. */ + uint32_t size; /**< Size of the ring. */ + uint32_t mask; /**< Mask (size-1) of ring. */ + volatile uint32_t head; /**< Consumer head. */ + volatile uint32_t tail; /**< Consumer tail. */ +#ifdef RTE_RING_SPLIT_PROD_CONS + } cons __rte_cache_aligned; +#else + } cons; +#endif + +#ifdef RTE_LIBRTE_RING_DEBUG + struct rte_ring_debug_stats stats[RTE_MAX_LCORE]; +#endif + + void * ring[0] __rte_cache_aligned; /**< Memory space of ring starts here. + * not volatile so need to be careful + * about compiler re-ordering */ +}; + +#define RING_F_SP_ENQ 0x0001 /**< The default enqueue is "single-producer". */ +#define RING_F_SC_DEQ 0x0002 /**< The default dequeue is "single-consumer". */ +#define RTE_RING_QUOT_EXCEED (1 << 31) /**< Quota exceed for burst ops */ +#define RTE_RING_SZ_MASK (unsigned)(0x0fffffff) /**< Ring size mask */ + +/** + * @internal When debug is enabled, store ring statistics. + * @param r + * A pointer to the ring. + * @param name + * The name of the statistics field to increment in the ring. + * @param n + * The number to add to the object-oriented statistics. + */ +#ifdef RTE_LIBRTE_RING_DEBUG +#define __RING_STAT_ADD(r, name, n) do { \ + unsigned __lcore_id = rte_lcore_id(); \ + if (__lcore_id < RTE_MAX_LCORE) { \ + r->stats[__lcore_id].name##_objs += n; \ + r->stats[__lcore_id].name##_bulk += 1; \ + } \ + } while(0) +#else +#define __RING_STAT_ADD(r, name, n) do {} while(0) +#endif + +/** + * Calculate the memory size needed for a ring + * + * This function returns the number of bytes needed for a ring, given + * the number of elements in it. This value is the sum of the size of + * the structure rte_ring and the size of the memory needed by the + * objects pointers. The value is aligned to a cache line size. + * + * @param count + * The number of elements in the ring (must be a power of 2). + * @return + * - The memory size needed for the ring on success. + * - -EINVAL if count is not a power of 2. + */ +ssize_t rte_ring_get_memsize(unsigned count); + +/** + * Initialize a ring structure. + * + * Initialize a ring structure in memory pointed by "r". The size of the + * memory area must be large enough to store the ring structure and the + * object table. It is advised to use rte_ring_get_memsize() to get the + * appropriate size. + * + * The ring size is set to *count*, which must be a power of two. Water + * marking is disabled by default. The real usable ring size is + * *count-1* instead of *count* to differentiate a free ring from an + * empty ring. + * + * The ring is not added in RTE_TAILQ_RING global list. Indeed, the + * memory given by the caller may not be shareable among dpdk + * processes. + * + * @param r + * The pointer to the ring structure followed by the objects table. + * @param name + * The name of the ring. + * @param count + * The number of elements in the ring (must be a power of 2). + * @param flags + * An OR of the following: + * - RING_F_SP_ENQ: If this flag is set, the default behavior when + * using ``rte_ring_enqueue()`` or ``rte_ring_enqueue_bulk()`` + * is "single-producer". Otherwise, it is "multi-producers". + * - RING_F_SC_DEQ: If this flag is set, the default behavior when + * using ``rte_ring_dequeue()`` or ``rte_ring_dequeue_bulk()`` + * is "single-consumer". Otherwise, it is "multi-consumers". + * @return + * 0 on success, or a negative value on error. + */ +int rte_ring_init(struct rte_ring *r, const char *name, unsigned count, + unsigned flags); + +/** + * Create a new ring named *name* in memory. + * + * This function uses ``memzone_reserve()`` to allocate memory. Then it + * calls rte_ring_init() to initialize an empty ring. + * + * The new ring size is set to *count*, which must be a power of + * two. Water marking is disabled by default. The real usable ring size + * is *count-1* instead of *count* to differentiate a free ring from an + * empty ring. + * + * The ring is added in RTE_TAILQ_RING list. + * + * @param name + * The name of the ring. + * @param count + * The size of the ring (must be a power of 2). + * @param socket_id + * The *socket_id* argument is the socket identifier in case of + * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA + * constraint for the reserved zone. + * @param flags + * An OR of the following: + * - RING_F_SP_ENQ: If this flag is set, the default behavior when + * using ``rte_ring_enqueue()`` or ``rte_ring_enqueue_bulk()`` + * is "single-producer". Otherwise, it is "multi-producers". + * - RING_F_SC_DEQ: If this flag is set, the default behavior when + * using ``rte_ring_dequeue()`` or ``rte_ring_dequeue_bulk()`` + * is "single-consumer". Otherwise, it is "multi-consumers". + * @return + * On success, the pointer to the new allocated ring. NULL on error with + * rte_errno set appropriately. Possible errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - EINVAL - count provided is not a power of 2 + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_ring *rte_ring_create(const char *name, unsigned count, + int socket_id, unsigned flags); +/** + * De-allocate all memory used by the ring. + * + * @param r + * Ring to free + */ +void rte_ring_free(struct rte_ring *r); + +/** + * Change the high water mark. + * + * If *count* is 0, water marking is disabled. Otherwise, it is set to the + * *count* value. The *count* value must be greater than 0 and less + * than the ring size. + * + * This function can be called at any time (not necessarily at + * initialization). + * + * @param r + * A pointer to the ring structure. + * @param count + * The new water mark value. + * @return + * - 0: Success; water mark changed. + * - -EINVAL: Invalid water mark value. + */ +int rte_ring_set_water_mark(struct rte_ring *r, unsigned count); + +/** + * Dump the status of the ring to the console. + * + * @param f + * A pointer to a file for output + * @param r + * A pointer to the ring structure. + */ +void rte_ring_dump(FILE *f, const struct rte_ring *r); + +/* the actual enqueue of pointers on the ring. + * Placed here since identical code needed in both + * single and multi producer enqueue functions */ +#define ENQUEUE_PTRS() do { \ + const uint32_t size = r->prod.size; \ + uint32_t idx = prod_head & mask; \ + if (likely(idx + n < size)) { \ + for (i = 0; i < (n & ((~(unsigned)0x3))); i+=4, idx+=4) { \ + r->ring[idx] = obj_table[i]; \ + r->ring[idx+1] = obj_table[i+1]; \ + r->ring[idx+2] = obj_table[i+2]; \ + r->ring[idx+3] = obj_table[i+3]; \ + } \ + switch (n & 0x3) { \ + case 3: r->ring[idx++] = obj_table[i++]; \ + case 2: r->ring[idx++] = obj_table[i++]; \ + case 1: r->ring[idx++] = obj_table[i++]; \ + } \ + } else { \ + for (i = 0; idx < size; i++, idx++)\ + r->ring[idx] = obj_table[i]; \ + for (idx = 0; i < n; i++, idx++) \ + r->ring[idx] = obj_table[i]; \ + } \ +} while(0) + +/* the actual copy of pointers on the ring to obj_table. + * Placed here since identical code needed in both + * single and multi consumer dequeue functions */ +#define DEQUEUE_PTRS() do { \ + uint32_t idx = cons_head & mask; \ + const uint32_t size = r->cons.size; \ + if (likely(idx + n < size)) { \ + for (i = 0; i < (n & (~(unsigned)0x3)); i+=4, idx+=4) {\ + obj_table[i] = r->ring[idx]; \ + obj_table[i+1] = r->ring[idx+1]; \ + obj_table[i+2] = r->ring[idx+2]; \ + obj_table[i+3] = r->ring[idx+3]; \ + } \ + switch (n & 0x3) { \ + case 3: obj_table[i++] = r->ring[idx++]; \ + case 2: obj_table[i++] = r->ring[idx++]; \ + case 1: obj_table[i++] = r->ring[idx++]; \ + } \ + } else { \ + for (i = 0; idx < size; i++, idx++) \ + obj_table[i] = r->ring[idx]; \ + for (idx = 0; i < n; i++, idx++) \ + obj_table[i] = r->ring[idx]; \ + } \ +} while (0) + +/** + * @internal Enqueue several objects on the ring (multi-producers safe). + * + * This function uses a "compare and set" instruction to move the + * producer index atomically. + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the ring from the obj_table. + * @param behavior + * RTE_RING_QUEUE_FIXED: Enqueue a fixed number of items from a ring + * RTE_RING_QUEUE_VARIABLE: Enqueue as many items a possible from ring + * @return + * Depend on the behavior value + * if behavior = RTE_RING_QUEUE_FIXED + * - 0: Success; objects enqueue. + * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the + * high water mark is exceeded. + * - -ENOBUFS: Not enough room in the ring to enqueue, no object is enqueued. + * if behavior = RTE_RING_QUEUE_VARIABLE + * - n: Actual number of objects enqueued. + */ +static inline int __attribute__((always_inline)) +__rte_ring_mp_do_enqueue(struct rte_ring *r, void * const *obj_table, + unsigned n, enum rte_ring_queue_behavior behavior) +{ + uint32_t prod_head, prod_next; + uint32_t cons_tail, free_entries; + const unsigned max = n; + int success; + unsigned i, rep = 0; + uint32_t mask = r->prod.mask; + int ret; + + /* move prod.head atomically */ + do { + /* Reset n to the initial burst count */ + n = max; + + prod_head = r->prod.head; + cons_tail = r->cons.tail; + /* The subtraction is done between two unsigned 32bits value + * (the result is always modulo 32 bits even if we have + * prod_head > cons_tail). So 'free_entries' is always between 0 + * and size(ring)-1. */ + free_entries = (mask + cons_tail - prod_head); + + /* check that we have enough room in ring */ + if (unlikely(n > free_entries)) { + if (behavior == RTE_RING_QUEUE_FIXED) { + __RING_STAT_ADD(r, enq_fail, n); + return -ENOBUFS; + } + else { + /* No free entry available */ + if (unlikely(free_entries == 0)) { + __RING_STAT_ADD(r, enq_fail, n); + return 0; + } + + n = free_entries; + } + } + + prod_next = prod_head + n; + success = rte_atomic32_cmpset(&r->prod.head, prod_head, + prod_next); + } while (unlikely(success == 0)); + + /* write entries in ring */ + ENQUEUE_PTRS(); + rte_smp_wmb(); + + /* if we exceed the watermark */ + if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) { + ret = (behavior == RTE_RING_QUEUE_FIXED) ? -EDQUOT : + (int)(n | RTE_RING_QUOT_EXCEED); + __RING_STAT_ADD(r, enq_quota, n); + } + else { + ret = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : n; + __RING_STAT_ADD(r, enq_success, n); + } + + /* + * If there are other enqueues in progress that preceded us, + * we need to wait for them to complete + */ + while (unlikely(r->prod.tail != prod_head)) { + rte_pause(); + + /* Set RTE_RING_PAUSE_REP_COUNT to avoid spin too long waiting + * for other thread finish. It gives pre-empted thread a chance + * to proceed and finish with ring dequeue operation. */ + if (RTE_RING_PAUSE_REP_COUNT && + ++rep == RTE_RING_PAUSE_REP_COUNT) { + rep = 0; + sched_yield(); + } + } + r->prod.tail = prod_next; + return ret; +} + +/** + * @internal Enqueue several objects on a ring (NOT multi-producers safe). + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the ring from the obj_table. + * @param behavior + * RTE_RING_QUEUE_FIXED: Enqueue a fixed number of items from a ring + * RTE_RING_QUEUE_VARIABLE: Enqueue as many items a possible from ring + * @return + * Depend on the behavior value + * if behavior = RTE_RING_QUEUE_FIXED + * - 0: Success; objects enqueue. + * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the + * high water mark is exceeded. + * - -ENOBUFS: Not enough room in the ring to enqueue, no object is enqueued. + * if behavior = RTE_RING_QUEUE_VARIABLE + * - n: Actual number of objects enqueued. + */ +static inline int __attribute__((always_inline)) +__rte_ring_sp_do_enqueue(struct rte_ring *r, void * const *obj_table, + unsigned n, enum rte_ring_queue_behavior behavior) +{ + uint32_t prod_head, cons_tail; + uint32_t prod_next, free_entries; + unsigned i; + uint32_t mask = r->prod.mask; + int ret; + + prod_head = r->prod.head; + cons_tail = r->cons.tail; + /* The subtraction is done between two unsigned 32bits value + * (the result is always modulo 32 bits even if we have + * prod_head > cons_tail). So 'free_entries' is always between 0 + * and size(ring)-1. */ + free_entries = mask + cons_tail - prod_head; + + /* check that we have enough room in ring */ + if (unlikely(n > free_entries)) { + if (behavior == RTE_RING_QUEUE_FIXED) { + __RING_STAT_ADD(r, enq_fail, n); + return -ENOBUFS; + } + else { + /* No free entry available */ + if (unlikely(free_entries == 0)) { + __RING_STAT_ADD(r, enq_fail, n); + return 0; + } + + n = free_entries; + } + } + + prod_next = prod_head + n; + r->prod.head = prod_next; + + /* write entries in ring */ + ENQUEUE_PTRS(); + rte_smp_wmb(); + + /* if we exceed the watermark */ + if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) { + ret = (behavior == RTE_RING_QUEUE_FIXED) ? -EDQUOT : + (int)(n | RTE_RING_QUOT_EXCEED); + __RING_STAT_ADD(r, enq_quota, n); + } + else { + ret = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : n; + __RING_STAT_ADD(r, enq_success, n); + } + + r->prod.tail = prod_next; + return ret; +} + +/** + * @internal Dequeue several objects from a ring (multi-consumers safe). When + * the request objects are more than the available objects, only dequeue the + * actual number of objects + * + * This function uses a "compare and set" instruction to move the + * consumer index atomically. + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to dequeue from the ring to the obj_table. + * @param behavior + * RTE_RING_QUEUE_FIXED: Dequeue a fixed number of items from a ring + * RTE_RING_QUEUE_VARIABLE: Dequeue as many items a possible from ring + * @return + * Depend on the behavior value + * if behavior = RTE_RING_QUEUE_FIXED + * - 0: Success; objects dequeued. + * - -ENOENT: Not enough entries in the ring to dequeue; no object is + * dequeued. + * if behavior = RTE_RING_QUEUE_VARIABLE + * - n: Actual number of objects dequeued. + */ + +static inline int __attribute__((always_inline)) +__rte_ring_mc_do_dequeue(struct rte_ring *r, void **obj_table, + unsigned n, enum rte_ring_queue_behavior behavior) +{ + uint32_t cons_head, prod_tail; + uint32_t cons_next, entries; + const unsigned max = n; + int success; + unsigned i, rep = 0; + uint32_t mask = r->prod.mask; + + /* move cons.head atomically */ + do { + /* Restore n as it may change every loop */ + n = max; + + cons_head = r->cons.head; + prod_tail = r->prod.tail; + /* The subtraction is done between two unsigned 32bits value + * (the result is always modulo 32 bits even if we have + * cons_head > prod_tail). So 'entries' is always between 0 + * and size(ring)-1. */ + entries = (prod_tail - cons_head); + + /* Set the actual entries for dequeue */ + if (n > entries) { + if (behavior == RTE_RING_QUEUE_FIXED) { + __RING_STAT_ADD(r, deq_fail, n); + return -ENOENT; + } + else { + if (unlikely(entries == 0)){ + __RING_STAT_ADD(r, deq_fail, n); + return 0; + } + + n = entries; + } + } + + cons_next = cons_head + n; + success = rte_atomic32_cmpset(&r->cons.head, cons_head, + cons_next); + } while (unlikely(success == 0)); + + /* copy in table */ + DEQUEUE_PTRS(); + rte_smp_rmb(); + + /* + * If there are other dequeues in progress that preceded us, + * we need to wait for them to complete + */ + while (unlikely(r->cons.tail != cons_head)) { + rte_pause(); + + /* Set RTE_RING_PAUSE_REP_COUNT to avoid spin too long waiting + * for other thread finish. It gives pre-empted thread a chance + * to proceed and finish with ring dequeue operation. */ + if (RTE_RING_PAUSE_REP_COUNT && + ++rep == RTE_RING_PAUSE_REP_COUNT) { + rep = 0; + sched_yield(); + } + } + __RING_STAT_ADD(r, deq_success, n); + r->cons.tail = cons_next; + + return behavior == RTE_RING_QUEUE_FIXED ? 0 : n; +} + +/** + * @internal Dequeue several objects from a ring (NOT multi-consumers safe). + * When the request objects are more than the available objects, only dequeue + * the actual number of objects + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to dequeue from the ring to the obj_table. + * @param behavior + * RTE_RING_QUEUE_FIXED: Dequeue a fixed number of items from a ring + * RTE_RING_QUEUE_VARIABLE: Dequeue as many items a possible from ring + * @return + * Depend on the behavior value + * if behavior = RTE_RING_QUEUE_FIXED + * - 0: Success; objects dequeued. + * - -ENOENT: Not enough entries in the ring to dequeue; no object is + * dequeued. + * if behavior = RTE_RING_QUEUE_VARIABLE + * - n: Actual number of objects dequeued. + */ +static inline int __attribute__((always_inline)) +__rte_ring_sc_do_dequeue(struct rte_ring *r, void **obj_table, + unsigned n, enum rte_ring_queue_behavior behavior) +{ + uint32_t cons_head, prod_tail; + uint32_t cons_next, entries; + unsigned i; + uint32_t mask = r->prod.mask; + + cons_head = r->cons.head; + prod_tail = r->prod.tail; + /* The subtraction is done between two unsigned 32bits value + * (the result is always modulo 32 bits even if we have + * cons_head > prod_tail). So 'entries' is always between 0 + * and size(ring)-1. */ + entries = prod_tail - cons_head; + + if (n > entries) { + if (behavior == RTE_RING_QUEUE_FIXED) { + __RING_STAT_ADD(r, deq_fail, n); + return -ENOENT; + } + else { + if (unlikely(entries == 0)){ + __RING_STAT_ADD(r, deq_fail, n); + return 0; + } + + n = entries; + } + } + + cons_next = cons_head + n; + r->cons.head = cons_next; + + /* copy in table */ + DEQUEUE_PTRS(); + rte_smp_rmb(); + + __RING_STAT_ADD(r, deq_success, n); + r->cons.tail = cons_next; + return behavior == RTE_RING_QUEUE_FIXED ? 0 : n; +} + +/** + * Enqueue several objects on the ring (multi-producers safe). + * + * This function uses a "compare and set" instruction to move the + * producer index atomically. + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the ring from the obj_table. + * @return + * - 0: Success; objects enqueue. + * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the + * high water mark is exceeded. + * - -ENOBUFS: Not enough room in the ring to enqueue, no object is enqueued. + */ +static inline int __attribute__((always_inline)) +rte_ring_mp_enqueue_bulk(struct rte_ring *r, void * const *obj_table, + unsigned n) +{ + return __rte_ring_mp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED); +} + +/** + * Enqueue several objects on a ring (NOT multi-producers safe). + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the ring from the obj_table. + * @return + * - 0: Success; objects enqueued. + * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the + * high water mark is exceeded. + * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. + */ +static inline int __attribute__((always_inline)) +rte_ring_sp_enqueue_bulk(struct rte_ring *r, void * const *obj_table, + unsigned n) +{ + return __rte_ring_sp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED); +} + +/** + * Enqueue several objects on a ring. + * + * This function calls the multi-producer or the single-producer + * version depending on the default behavior that was specified at + * ring creation time (see flags). + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the ring from the obj_table. + * @return + * - 0: Success; objects enqueued. + * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the + * high water mark is exceeded. + * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. + */ +static inline int __attribute__((always_inline)) +rte_ring_enqueue_bulk(struct rte_ring *r, void * const *obj_table, + unsigned n) +{ + if (r->prod.sp_enqueue) + return rte_ring_sp_enqueue_bulk(r, obj_table, n); + else + return rte_ring_mp_enqueue_bulk(r, obj_table, n); +} + +/** + * Enqueue one object on a ring (multi-producers safe). + * + * This function uses a "compare and set" instruction to move the + * producer index atomically. + * + * @param r + * A pointer to the ring structure. + * @param obj + * A pointer to the object to be added. + * @return + * - 0: Success; objects enqueued. + * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the + * high water mark is exceeded. + * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. + */ +static inline int __attribute__((always_inline)) +rte_ring_mp_enqueue(struct rte_ring *r, void *obj) +{ + return rte_ring_mp_enqueue_bulk(r, &obj, 1); +} + +/** + * Enqueue one object on a ring (NOT multi-producers safe). + * + * @param r + * A pointer to the ring structure. + * @param obj + * A pointer to the object to be added. + * @return + * - 0: Success; objects enqueued. + * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the + * high water mark is exceeded. + * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. + */ +static inline int __attribute__((always_inline)) +rte_ring_sp_enqueue(struct rte_ring *r, void *obj) +{ + return rte_ring_sp_enqueue_bulk(r, &obj, 1); +} + +/** + * Enqueue one object on a ring. + * + * This function calls the multi-producer or the single-producer + * version, depending on the default behaviour that was specified at + * ring creation time (see flags). + * + * @param r + * A pointer to the ring structure. + * @param obj + * A pointer to the object to be added. + * @return + * - 0: Success; objects enqueued. + * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the + * high water mark is exceeded. + * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. + */ +static inline int __attribute__((always_inline)) +rte_ring_enqueue(struct rte_ring *r, void *obj) +{ + if (r->prod.sp_enqueue) + return rte_ring_sp_enqueue(r, obj); + else + return rte_ring_mp_enqueue(r, obj); +} + +/** + * Dequeue several objects from a ring (multi-consumers safe). + * + * This function uses a "compare and set" instruction to move the + * consumer index atomically. + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to dequeue from the ring to the obj_table. + * @return + * - 0: Success; objects dequeued. + * - -ENOENT: Not enough entries in the ring to dequeue; no object is + * dequeued. + */ +static inline int __attribute__((always_inline)) +rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n) +{ + return __rte_ring_mc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED); +} + +/** + * Dequeue several objects from a ring (NOT multi-consumers safe). + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to dequeue from the ring to the obj_table, + * must be strictly positive. + * @return + * - 0: Success; objects dequeued. + * - -ENOENT: Not enough entries in the ring to dequeue; no object is + * dequeued. + */ +static inline int __attribute__((always_inline)) +rte_ring_sc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n) +{ + return __rte_ring_sc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED); +} + +/** + * Dequeue several objects from a ring. + * + * This function calls the multi-consumers or the single-consumer + * version, depending on the default behaviour that was specified at + * ring creation time (see flags). + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to dequeue from the ring to the obj_table. + * @return + * - 0: Success; objects dequeued. + * - -ENOENT: Not enough entries in the ring to dequeue, no object is + * dequeued. + */ +static inline int __attribute__((always_inline)) +rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n) +{ + if (r->cons.sc_dequeue) + return rte_ring_sc_dequeue_bulk(r, obj_table, n); + else + return rte_ring_mc_dequeue_bulk(r, obj_table, n); +} + +/** + * Dequeue one object from a ring (multi-consumers safe). + * + * This function uses a "compare and set" instruction to move the + * consumer index atomically. + * + * @param r + * A pointer to the ring structure. + * @param obj_p + * A pointer to a void * pointer (object) that will be filled. + * @return + * - 0: Success; objects dequeued. + * - -ENOENT: Not enough entries in the ring to dequeue; no object is + * dequeued. + */ +static inline int __attribute__((always_inline)) +rte_ring_mc_dequeue(struct rte_ring *r, void **obj_p) +{ + return rte_ring_mc_dequeue_bulk(r, obj_p, 1); +} + +/** + * Dequeue one object from a ring (NOT multi-consumers safe). + * + * @param r + * A pointer to the ring structure. + * @param obj_p + * A pointer to a void * pointer (object) that will be filled. + * @return + * - 0: Success; objects dequeued. + * - -ENOENT: Not enough entries in the ring to dequeue, no object is + * dequeued. + */ +static inline int __attribute__((always_inline)) +rte_ring_sc_dequeue(struct rte_ring *r, void **obj_p) +{ + return rte_ring_sc_dequeue_bulk(r, obj_p, 1); +} + +/** + * Dequeue one object from a ring. + * + * This function calls the multi-consumers or the single-consumer + * version depending on the default behaviour that was specified at + * ring creation time (see flags). + * + * @param r + * A pointer to the ring structure. + * @param obj_p + * A pointer to a void * pointer (object) that will be filled. + * @return + * - 0: Success, objects dequeued. + * - -ENOENT: Not enough entries in the ring to dequeue, no object is + * dequeued. + */ +static inline int __attribute__((always_inline)) +rte_ring_dequeue(struct rte_ring *r, void **obj_p) +{ + if (r->cons.sc_dequeue) + return rte_ring_sc_dequeue(r, obj_p); + else + return rte_ring_mc_dequeue(r, obj_p); +} + +/** + * Test if a ring is full. + * + * @param r + * A pointer to the ring structure. + * @return + * - 1: The ring is full. + * - 0: The ring is not full. + */ +static inline int +rte_ring_full(const struct rte_ring *r) +{ + uint32_t prod_tail = r->prod.tail; + uint32_t cons_tail = r->cons.tail; + return (((cons_tail - prod_tail - 1) & r->prod.mask) == 0); +} + +/** + * Test if a ring is empty. + * + * @param r + * A pointer to the ring structure. + * @return + * - 1: The ring is empty. + * - 0: The ring is not empty. + */ +static inline int +rte_ring_empty(const struct rte_ring *r) +{ + uint32_t prod_tail = r->prod.tail; + uint32_t cons_tail = r->cons.tail; + return !!(cons_tail == prod_tail); +} + +/** + * Return the number of entries in a ring. + * + * @param r + * A pointer to the ring structure. + * @return + * The number of entries in the ring. + */ +static inline unsigned +rte_ring_count(const struct rte_ring *r) +{ + uint32_t prod_tail = r->prod.tail; + uint32_t cons_tail = r->cons.tail; + return ((prod_tail - cons_tail) & r->prod.mask); +} + +/** + * Return the number of free entries in a ring. + * + * @param r + * A pointer to the ring structure. + * @return + * The number of free entries in the ring. + */ +static inline unsigned +rte_ring_free_count(const struct rte_ring *r) +{ + uint32_t prod_tail = r->prod.tail; + uint32_t cons_tail = r->cons.tail; + return ((cons_tail - prod_tail - 1) & r->prod.mask); +} + +/** + * Dump the status of all rings on the console + * + * @param f + * A pointer to a file for output + */ +void rte_ring_list_dump(FILE *f); + +/** + * Search a ring from its name + * + * @param name + * The name of the ring. + * @return + * The pointer to the ring matching the name, or NULL if not found, + * with rte_errno set appropriately. Possible rte_errno values include: + * - ENOENT - required entry not available to return. + */ +struct rte_ring *rte_ring_lookup(const char *name); + +/** + * Enqueue several objects on the ring (multi-producers safe). + * + * This function uses a "compare and set" instruction to move the + * producer index atomically. + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the ring from the obj_table. + * @return + * - n: Actual number of objects enqueued. + */ +static inline unsigned __attribute__((always_inline)) +rte_ring_mp_enqueue_burst(struct rte_ring *r, void * const *obj_table, + unsigned n) +{ + return __rte_ring_mp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE); +} + +/** + * Enqueue several objects on a ring (NOT multi-producers safe). + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the ring from the obj_table. + * @return + * - n: Actual number of objects enqueued. + */ +static inline unsigned __attribute__((always_inline)) +rte_ring_sp_enqueue_burst(struct rte_ring *r, void * const *obj_table, + unsigned n) +{ + return __rte_ring_sp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE); +} + +/** + * Enqueue several objects on a ring. + * + * This function calls the multi-producer or the single-producer + * version depending on the default behavior that was specified at + * ring creation time (see flags). + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the ring from the obj_table. + * @return + * - n: Actual number of objects enqueued. + */ +static inline unsigned __attribute__((always_inline)) +rte_ring_enqueue_burst(struct rte_ring *r, void * const *obj_table, + unsigned n) +{ + if (r->prod.sp_enqueue) + return rte_ring_sp_enqueue_burst(r, obj_table, n); + else + return rte_ring_mp_enqueue_burst(r, obj_table, n); +} + +/** + * Dequeue several objects from a ring (multi-consumers safe). When the request + * objects are more than the available objects, only dequeue the actual number + * of objects + * + * This function uses a "compare and set" instruction to move the + * consumer index atomically. + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to dequeue from the ring to the obj_table. + * @return + * - n: Actual number of objects dequeued, 0 if ring is empty + */ +static inline unsigned __attribute__((always_inline)) +rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n) +{ + return __rte_ring_mc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE); +} + +/** + * Dequeue several objects from a ring (NOT multi-consumers safe).When the + * request objects are more than the available objects, only dequeue the + * actual number of objects + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to dequeue from the ring to the obj_table. + * @return + * - n: Actual number of objects dequeued, 0 if ring is empty + */ +static inline unsigned __attribute__((always_inline)) +rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n) +{ + return __rte_ring_sc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE); +} + +/** + * Dequeue multiple objects from a ring up to a maximum number. + * + * This function calls the multi-consumers or the single-consumer + * version, depending on the default behaviour that was specified at + * ring creation time (see flags). + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to dequeue from the ring to the obj_table. + * @return + * - Number of objects dequeued + */ +static inline unsigned __attribute__((always_inline)) +rte_ring_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n) +{ + if (r->cons.sc_dequeue) + return rte_ring_sc_dequeue_burst(r, obj_table, n); + else + return rte_ring_mc_dequeue_burst(r, obj_table, n); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RING_H_ */ diff --git a/src/dpdk22/lib/librte_table/rte_lru.h b/src/dpdk22/lib/librte_table/rte_lru.h new file mode 100644 index 00000000..e87e062d --- /dev/null +++ b/src/dpdk22/lib/librte_table/rte_lru.h @@ -0,0 +1,213 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_LRU_H__ +#define __INCLUDE_RTE_LRU_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#ifdef __INTEL_COMPILER +#define GCC_VERSION (0) +#else +#define GCC_VERSION (__GNUC__ * 10000+__GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__) +#endif + +#ifndef RTE_TABLE_HASH_LRU_STRATEGY +#ifdef __SSE4_2__ +#define RTE_TABLE_HASH_LRU_STRATEGY 2 +#else /* if no SSE, use simple scalar version */ +#define RTE_TABLE_HASH_LRU_STRATEGY 1 +#endif +#endif + +#ifndef RTE_ARCH_X86_64 +#undef RTE_TABLE_HASH_LRU_STRATEGY +#define RTE_TABLE_HASH_LRU_STRATEGY 1 +#endif + +#if (RTE_TABLE_HASH_LRU_STRATEGY < 0) || (RTE_TABLE_HASH_LRU_STRATEGY > 3) +#error Invalid value for RTE_TABLE_HASH_LRU_STRATEGY +#endif + +#if RTE_TABLE_HASH_LRU_STRATEGY == 0 + +#define lru_init(bucket) \ +do \ + bucket = bucket; \ +while (0) + +#define lru_pos(bucket) (bucket->lru_list & 0xFFFFLLU) + +#define lru_update(bucket, mru_val) \ +do { \ + bucket = bucket; \ + mru_val = mru_val; \ +} while (0) + +#elif RTE_TABLE_HASH_LRU_STRATEGY == 1 + +#define lru_init(bucket) \ +do \ + bucket->lru_list = 0x0000000100020003LLU; \ +while (0) + +#define lru_pos(bucket) (bucket->lru_list & 0xFFFFLLU) + +#define lru_update(bucket, mru_val) \ +do { \ + uint64_t x, pos, x0, x1, x2, mask; \ + \ + x = bucket->lru_list; \ + \ + pos = 4; \ + if ((x >> 48) == ((uint64_t) mru_val)) \ + pos = 3; \ + \ + if (((x >> 32) & 0xFFFFLLU) == ((uint64_t) mru_val)) \ + pos = 2; \ + \ + if (((x >> 16) & 0xFFFFLLU) == ((uint64_t) mru_val)) \ + pos = 1; \ + \ + if ((x & 0xFFFFLLU) == ((uint64_t) mru_val)) \ + pos = 0; \ + \ + \ + pos <<= 4; \ + mask = (~0LLU) << pos; \ + x0 = x & (~mask); \ + x1 = (x >> 16) & mask; \ + x2 = (x << (48 - pos)) & (0xFFFFLLU << 48); \ + x = x0 | x1 | x2; \ + \ + if (pos != 64) \ + bucket->lru_list = x; \ +} while (0) + +#elif RTE_TABLE_HASH_LRU_STRATEGY == 2 + +#if GCC_VERSION > 40306 +#include +#else +#include +#include +#include +#endif + +#define lru_init(bucket) \ +do \ + bucket->lru_list = 0x0000000100020003LLU; \ +while (0) + +#define lru_pos(bucket) (bucket->lru_list & 0xFFFFLLU) + +#define lru_update(bucket, mru_val) \ +do { \ + /* set up the masks for all possible shuffles, depends on pos */\ + static uint64_t masks[10] = { \ + /* Shuffle order; Make Zero (see _mm_shuffle_epi8 manual) */\ + 0x0100070605040302, 0x8080808080808080, \ + 0x0302070605040100, 0x8080808080808080, \ + 0x0504070603020100, 0x8080808080808080, \ + 0x0706050403020100, 0x8080808080808080, \ + 0x0706050403020100, 0x8080808080808080}; \ + /* load up one register with repeats of mru-val */ \ + uint64_t mru2 = mru_val; \ + uint64_t mru3 = mru2 | (mru2 << 16); \ + uint64_t lru = bucket->lru_list; \ + /* XOR to cause the word we're looking for to go to zero */ \ + uint64_t mru = lru ^ ((mru3 << 32) | mru3); \ + __m128i c = _mm_cvtsi64_si128(mru); \ + __m128i b = _mm_cvtsi64_si128(lru); \ + /* Find the minimum value (first zero word, if it's in there) */\ + __m128i d = _mm_minpos_epu16(c); \ + /* Second word is the index to found word (first word is the value) */\ + unsigned pos = _mm_extract_epi16(d, 1); \ + /* move the recently used location to top of list */ \ + __m128i k = _mm_shuffle_epi8(b, *((__m128i *) &masks[2 * pos]));\ + /* Finally, update the original list with the reordered data */ \ + bucket->lru_list = _mm_extract_epi64(k, 0); \ + /* Phwew! */ \ +} while (0) + +#elif RTE_TABLE_HASH_LRU_STRATEGY == 3 + +#if GCC_VERSION > 40306 +#include +#else +#include +#include +#include +#endif + +#define lru_init(bucket) \ +do \ + bucket->lru_list = ~0LLU; \ +while (0) + + +static inline int +f_lru_pos(uint64_t lru_list) +{ + __m128i lst = _mm_set_epi64x((uint64_t)-1, lru_list); + __m128i min = _mm_minpos_epu16(lst); + return _mm_extract_epi16(min, 1); +} +#define lru_pos(bucket) f_lru_pos(bucket->lru_list) + +#define lru_update(bucket, mru_val) \ +do { \ + const uint64_t orvals[] = {0xFFFFLLU, 0xFFFFLLU << 16, \ + 0xFFFFLLU << 32, 0xFFFFLLU << 48, 0LLU}; \ + const uint64_t decs[] = {0x1000100010001LLU, 0}; \ + __m128i lru = _mm_cvtsi64_si128(bucket->lru_list); \ + __m128i vdec = _mm_cvtsi64_si128(decs[mru_val>>2]); \ + lru = _mm_subs_epu16(lru, vdec); \ + bucket->lru_list = _mm_extract_epi64(lru, 0) | orvals[mru_val]; \ +} while (0) + +#else + +#error "Incorrect value for RTE_TABLE_HASH_LRU_STRATEGY" + +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_table/rte_table.h b/src/dpdk22/lib/librte_table/rte_table.h new file mode 100644 index 00000000..720514ea --- /dev/null +++ b/src/dpdk22/lib/librte_table/rte_table.h @@ -0,0 +1,301 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TABLE_H__ +#define __INCLUDE_RTE_TABLE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Table + * + * This tool is part of the Intel DPDK Packet Framework tool suite and provides + * a standard interface to implement different types of lookup tables for data + * plane processing. + * + * Virtually any search algorithm that can uniquely associate data to a lookup + * key can be fitted under this lookup table abstraction. For the flow table + * use-case, the lookup key is an n-tuple of packet fields that uniquely + * identifies a traffic flow, while data represents actions and action + * meta-data associated with the same traffic flow. + * + ***/ + +#include +#include + +struct rte_mbuf; + +/** Lookup table statistics */ +struct rte_table_stats { + uint64_t n_pkts_in; + uint64_t n_pkts_lookup_miss; +}; + +/** + * Lookup table create + * + * @param params + * Parameters for lookup table creation. The underlying data structure is + * different for each lookup table type. + * @param socket_id + * CPU socket ID (e.g. for memory allocation purpose) + * @param entry_size + * Data size of each lookup table entry (measured in bytes) + * @return + * Handle to lookup table instance + */ +typedef void* (*rte_table_op_create)(void *params, int socket_id, + uint32_t entry_size); + +/** + * Lookup table free + * + * @param table + * Handle to lookup table instance + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_table_op_free)(void *table); + +/** + * Lookup table entry add + * + * @param table + * Handle to lookup table instance + * @param key + * Lookup key + * @param entry + * Data to be associated with the current key. This parameter has to point to + * a valid memory buffer where the first entry_size bytes (table create + * parameter) are populated with the data. + * @param key_found + * After successful invocation, *key_found is set to a value different than 0 + * if the current key is already present in the table and to 0 if not. This + * pointer has to be set to a valid memory location before the table entry add + * function is called. + * @param entry_ptr + * After successful invocation, *entry_ptr stores the handle to the table + * entry containing the data associated with the current key. This handle can + * be used to perform further read-write accesses to this entry. This handle + * is valid until the key is deleted from the table or the same key is + * re-added to the table, typically to associate it with different data. This + * pointer has to be set to a valid memory location before the function is + * called. + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_table_op_entry_add)( + void *table, + void *key, + void *entry, + int *key_found, + void **entry_ptr); + +/** + * Lookup table entry delete + * + * @param table + * Handle to lookup table instance + * @param key + * Lookup key + * @param key_found + * After successful invocation, *key_found is set to a value different than 0 + * if the current key was present in the table before the delete operation + * was performed and to 0 if not. This pointer has to be set to a valid + * memory location before the table entry delete function is called. + * @param entry + * After successful invocation, if the key is found in the table (*key found + * is different than 0 after function call is completed) and entry points to + * a valid buffer (entry is set to a value different than NULL before the + * function is called), then the first entry_size bytes (table create + * parameter) in *entry store a copy of table entry that contained the data + * associated with the current key before the key was deleted. + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_table_op_entry_delete)( + void *table, + void *key, + int *key_found, + void *entry); + +/** + * Lookup table entry add bulk + * + * @param table + * Handle to lookup table instance + * @param key + * Array containing lookup keys + * @param entries + * Array containing data to be associated with each key. Every item in the + * array has to point to a valid memory buffer where the first entry_size + * bytes (table create parameter) are populated with the data. + * @param n_keys + * Number of keys to add + * @param key_found + * After successful invocation, key_found for every item in the array is set + * to a value different than 0 if the current key is already present in the + * table and to 0 if not. This pointer has to be set to a valid memory + * location before the table entry add function is called. + * @param entries_ptr + * After successful invocation, array *entries_ptr stores the handle to the + * table entry containing the data associated with every key. This handle can + * be used to perform further read-write accesses to this entry. This handle + * is valid until the key is deleted from the table or the same key is + * re-added to the table, typically to associate it with different data. This + * pointer has to be set to a valid memory location before the function is + * called. + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_table_op_entry_add_bulk)( + void *table, + void **keys, + void **entries, + uint32_t n_keys, + int *key_found, + void **entries_ptr); + +/** + * Lookup table entry delete bulk + * + * @param table + * Handle to lookup table instance + * @param key + * Array containing lookup keys + * @param n_keys + * Number of keys to delete + * @param key_found + * After successful invocation, key_found for every item in the array is set + * to a value different than 0if the current key was present in the table + * before the delete operation was performed and to 0 if not. This pointer + * has to be set to a valid memory location before the table entry delete + * function is called. + * @param entries + * If entries pointer is NULL, this pointer is ignored for every entry found. + * Else, after successful invocation, if specific key is found in the table + * (key_found is different than 0 for this item after function call is + * completed) and item of entry array points to a valid buffer (entry is set + * to a value different than NULL before the function is called), then the + * first entry_size bytes (table create parameter) in *entry store a copy of + * table entry that contained the data associated with the current key before + * the key was deleted. + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_table_op_entry_delete_bulk)( + void *table, + void **keys, + uint32_t n_keys, + int *key_found, + void **entries); + +/** + * Lookup table lookup + * + * @param table + * Handle to lookup table instance + * @param pkts + * Burst of input packets specified as array of up to 64 pointers to struct + * rte_mbuf + * @param pkts_mask + * 64-bit bitmask specifying which packets in the input burst are valid. When + * pkts_mask bit n is set, then element n of pkts array is pointing to a + * valid packet. Otherwise, element n of pkts array does not point to a valid + * packet, therefore it will not be accessed. + * @param lookup_hit_mask + * Once the table lookup operation is completed, this 64-bit bitmask + * specifies which of the valid packets in the input burst resulted in lookup + * hit. For each valid input packet (pkts_mask bit n is set), the following + * are true on lookup hit: lookup_hit_mask bit n is set, element n of entries + * array is valid and it points to the lookup table entry that was hit. For + * each valid input packet (pkts_mask bit n is set), the following are true + * on lookup miss: lookup_hit_mask bit n is not set and element n of entries + * array is not valid. + * @param entries + * Once the table lookup operation is completed, this array provides the + * lookup table entries that were hit, as described above. It is required + * that this array is always pre-allocated by the caller of this function + * with exactly 64 elements. The implementation is allowed to speculatively + * modify the elements of this array, so elements marked as invalid in + * lookup_hit_mask once the table lookup operation is completed might have + * been modified by this function. + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_table_op_lookup)( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries); + +/** + * Lookup table stats read + * + * @param table + * Handle to lookup table instance + * @param stats + * Handle to table stats struct to copy data + * @param clear + * Flag indicating that stats should be cleared after read + * + * @return + * Error code or 0 on success. + */ +typedef int (*rte_table_op_stats_read)( + void *table, + struct rte_table_stats *stats, + int clear); + +/** Lookup table interface defining the lookup table operation */ +struct rte_table_ops { + rte_table_op_create f_create; /**< Create */ + rte_table_op_free f_free; /**< Free */ + rte_table_op_entry_add f_add; /**< Entry add */ + rte_table_op_entry_delete f_delete; /**< Entry delete */ + rte_table_op_entry_add_bulk f_add_bulk; /**< Add entry bulk */ + rte_table_op_entry_delete_bulk f_delete_bulk; /**< Delete entry bulk */ + rte_table_op_lookup f_lookup; /**< Lookup */ + rte_table_op_stats_read f_stats; /**< Stats */ +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_table/rte_table_acl.h b/src/dpdk22/lib/librte_table/rte_table_acl.h new file mode 100644 index 00000000..a9cc0328 --- /dev/null +++ b/src/dpdk22/lib/librte_table/rte_table_acl.h @@ -0,0 +1,95 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TABLE_ACL_H__ +#define __INCLUDE_RTE_TABLE_ACL_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Table ACL + * + * This table uses the Access Control List (ACL) algorithm to uniquely + * associate data to lookup keys. + * + * Use-cases: Firewall rule database, etc. + * + ***/ + +#include + +#include "rte_acl.h" + +#include "rte_table.h" + +/** ACL table parameters */ +struct rte_table_acl_params { + /** Name */ + const char *name; + + /** Maximum number of ACL rules in the table */ + uint32_t n_rules; + + /** Number of fields in the ACL rule specification */ + uint32_t n_rule_fields; + + /** Format specification of the fields of the ACL rule */ + struct rte_acl_field_def field_format[RTE_ACL_MAX_FIELDS]; +}; + +/** ACL rule specification for entry add operation */ +struct rte_table_acl_rule_add_params { + /** ACL rule priority, with 0 as the highest priority */ + int32_t priority; + + /** Values for the fields of the ACL rule to be added to the table */ + struct rte_acl_field field_value[RTE_ACL_MAX_FIELDS]; +}; + +/** ACL rule specification for entry delete operation */ +struct rte_table_acl_rule_delete_params { + /** Values for the fields of the ACL rule to be deleted from table */ + struct rte_acl_field field_value[RTE_ACL_MAX_FIELDS]; +}; + +/** ACL table operations */ +extern struct rte_table_ops rte_table_acl_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_table/rte_table_array.h b/src/dpdk22/lib/librte_table/rte_table_array.h new file mode 100644 index 00000000..9521119e --- /dev/null +++ b/src/dpdk22/lib/librte_table/rte_table_array.h @@ -0,0 +1,76 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TABLE_ARRAY_H__ +#define __INCLUDE_RTE_TABLE_ARRAY_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Table Array + * + * Simple array indexing. Lookup key is the array entry index. + * + ***/ + +#include + +#include "rte_table.h" + +/** Array table parameters */ +struct rte_table_array_params { + /** Number of array entries. Has to be a power of two. */ + uint32_t n_entries; + + /** Byte offset within input packet meta-data where lookup key (i.e. the + array entry index) is located. */ + uint32_t offset; +}; + +/** Array table key format */ +struct rte_table_array_key { + /** Array entry index */ + uint32_t pos; +}; + +/** Array table operations */ +extern struct rte_table_ops rte_table_array_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_table/rte_table_hash.h b/src/dpdk22/lib/librte_table/rte_table_hash.h new file mode 100644 index 00000000..9d17516a --- /dev/null +++ b/src/dpdk22/lib/librte_table/rte_table_hash.h @@ -0,0 +1,370 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TABLE_HASH_H__ +#define __INCLUDE_RTE_TABLE_HASH_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Table Hash + * + * These tables use the exact match criterion to uniquely associate data to + * lookup keys. + * + * Use-cases: Flow classification table, Address Resolution Protocol (ARP) table + * + * Hash table types: + * 1. Entry add strategy on bucket full: + * a. Least Recently Used (LRU): One of the existing keys in the bucket is + * deleted and the new key is added in its place. The number of keys in + * each bucket never grows bigger than 4. The logic to pick the key to + * be dropped from the bucket is LRU. The hash table lookup operation + * maintains the order in which the keys in the same bucket are hit, so + * every time a key is hit, it becomes the new Most Recently Used (MRU) + * key, i.e. the most unlikely candidate for drop. When a key is added + * to the bucket, it also becomes the new MRU key. When a key needs to + * be picked and dropped, the most likely candidate for drop, i.e. the + * current LRU key, is always picked. The LRU logic requires maintaining + * specific data structures per each bucket. + * b. Extendible bucket (ext): The bucket is extended with space for 4 more + * keys. This is done by allocating additional memory at table init time, + * which is used to create a pool of free keys (the size of this pool is + * configurable and always a multiple of 4). On key add operation, the + * allocation of a group of 4 keys only happens successfully within the + * limit of free keys, otherwise the key add operation fails. On key + * delete operation, a group of 4 keys is freed back to the pool of free + * keys when the key to be deleted is the only key that was used within + * its group of 4 keys at that time. On key lookup operation, if the + * current bucket is in extended state and a match is not found in the + * first group of 4 keys, the search continues beyond the first group of + * 4 keys, potentially until all keys in this bucket are examined. The + * extendible bucket logic requires maintaining specific data structures + * per table and per each bucket. + * 2. Key signature computation: + * a. Pre-computed key signature: The key lookup operation is split between + * two CPU cores. The first CPU core (typically the CPU core performing + * packet RX) extracts the key from the input packet, computes the key + * signature and saves both the key and the key signature in the packet + * buffer as packet meta-data. The second CPU core reads both the key and + * the key signature from the packet meta-data and performs the bucket + * search step of the key lookup operation. + * b. Key signature computed on lookup (do-sig): The same CPU core reads + * the key from the packet meta-data, uses it to compute the key + * signature and also performs the bucket search step of the key lookup + * operation. + * 3. Key size: + * a. Configurable key size + * b. Single key size (8-byte, 16-byte or 32-byte key size) + * + ***/ +#include + +#include "rte_table.h" + +/** Hash function */ +typedef uint64_t (*rte_table_hash_op_hash)( + void *key, + uint32_t key_size, + uint64_t seed); + +/** + * Hash tables with configurable key size + * + */ +/** Extendible bucket hash table parameters */ +struct rte_table_hash_ext_params { + /** Key size (number of bytes) */ + uint32_t key_size; + + /** Maximum number of keys */ + uint32_t n_keys; + + /** Number of hash table buckets. Each bucket stores up to 4 keys. */ + uint32_t n_buckets; + + /** Number of hash table bucket extensions. Each bucket extension has + space for 4 keys and each bucket can have 0, 1 or more extensions. */ + uint32_t n_buckets_ext; + + /** Hash function */ + rte_table_hash_op_hash f_hash; + + /** Seed value for the hash function */ + uint64_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; +}; + +/** Extendible bucket hash table operations for pre-computed key signature */ +extern struct rte_table_ops rte_table_hash_ext_ops; + +/** Extendible bucket hash table operations for key signature computed on + lookup ("do-sig") */ +extern struct rte_table_ops rte_table_hash_ext_dosig_ops; + +/** LRU hash table parameters */ +struct rte_table_hash_lru_params { + /** Key size (number of bytes) */ + uint32_t key_size; + + /** Maximum number of keys */ + uint32_t n_keys; + + /** Number of hash table buckets. Each bucket stores up to 4 keys. */ + uint32_t n_buckets; + + /** Hash function */ + rte_table_hash_op_hash f_hash; + + /** Seed value for the hash function */ + uint64_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; +}; + +/** LRU hash table operations for pre-computed key signature */ +extern struct rte_table_ops rte_table_hash_lru_ops; + +/** LRU hash table operations for key signature computed on lookup ("do-sig") */ +extern struct rte_table_ops rte_table_hash_lru_dosig_ops; + +/** + * 8-byte key hash tables + * + */ +/** LRU hash table parameters */ +struct rte_table_hash_key8_lru_params { + /** Maximum number of entries (and keys) in the table */ + uint32_t n_entries; + + /** Hash function */ + rte_table_hash_op_hash f_hash; + + /** Seed for the hash function */ + uint64_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; + + /** Bit-mask to be AND-ed to the key on lookup */ + uint8_t *key_mask; +}; + +/** LRU hash table operations for pre-computed key signature */ +extern struct rte_table_ops rte_table_hash_key8_lru_ops; + +/** LRU hash table operations for key signature computed on lookup ("do-sig") */ +extern struct rte_table_ops rte_table_hash_key8_lru_dosig_ops; + +/** Extendible bucket hash table parameters */ +struct rte_table_hash_key8_ext_params { + /** Maximum number of entries (and keys) in the table */ + uint32_t n_entries; + + /** Number of entries (and keys) for hash table bucket extensions. Each + bucket is extended in increments of 4 keys. */ + uint32_t n_entries_ext; + + /** Hash function */ + rte_table_hash_op_hash f_hash; + + /** Seed for the hash function */ + uint64_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; + + /** Bit-mask to be AND-ed to the key on lookup */ + uint8_t *key_mask; +}; + +/** Extendible bucket hash table operations for pre-computed key signature */ +extern struct rte_table_ops rte_table_hash_key8_ext_ops; + +/** Extendible bucket hash table operations for key signature computed on + lookup ("do-sig") */ +extern struct rte_table_ops rte_table_hash_key8_ext_dosig_ops; + +/** + * 16-byte key hash tables + * + */ +/** LRU hash table parameters */ +struct rte_table_hash_key16_lru_params { + /** Maximum number of entries (and keys) in the table */ + uint32_t n_entries; + + /** Hash function */ + rte_table_hash_op_hash f_hash; + + /** Seed for the hash function */ + uint64_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; + + /** Bit-mask to be AND-ed to the key on lookup */ + uint8_t *key_mask; +}; + +/** LRU hash table operations for pre-computed key signature */ +extern struct rte_table_ops rte_table_hash_key16_lru_ops; + +/** LRU hash table operations for key signature computed on lookup + ("do-sig") */ +extern struct rte_table_ops rte_table_hash_key16_lru_dosig_ops; + +/** Extendible bucket hash table parameters */ +struct rte_table_hash_key16_ext_params { + /** Maximum number of entries (and keys) in the table */ + uint32_t n_entries; + + /** Number of entries (and keys) for hash table bucket extensions. Each + bucket is extended in increments of 4 keys. */ + uint32_t n_entries_ext; + + /** Hash function */ + rte_table_hash_op_hash f_hash; + + /** Seed for the hash function */ + uint64_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; + + /** Bit-mask to be AND-ed to the key on lookup */ + uint8_t *key_mask; +}; + +/** Extendible bucket operations for pre-computed key signature */ +extern struct rte_table_ops rte_table_hash_key16_ext_ops; + +/** Extendible bucket hash table operations for key signature computed on + lookup ("do-sig") */ +extern struct rte_table_ops rte_table_hash_key16_ext_dosig_ops; + +/** + * 32-byte key hash tables + * + */ +/** LRU hash table parameters */ +struct rte_table_hash_key32_lru_params { + /** Maximum number of entries (and keys) in the table */ + uint32_t n_entries; + + /** Hash function */ + rte_table_hash_op_hash f_hash; + + /** Seed for the hash function */ + uint64_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; +}; + +/** LRU hash table operations for pre-computed key signature */ +extern struct rte_table_ops rte_table_hash_key32_lru_ops; + +/** Extendible bucket hash table parameters */ +struct rte_table_hash_key32_ext_params { + /** Maximum number of entries (and keys) in the table */ + uint32_t n_entries; + + /** Number of entries (and keys) for hash table bucket extensions. Each + bucket is extended in increments of 4 keys. */ + uint32_t n_entries_ext; + + /** Hash function */ + rte_table_hash_op_hash f_hash; + + /** Seed for the hash function */ + uint64_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; +}; + +/** Extendible bucket hash table operations */ +extern struct rte_table_ops rte_table_hash_key32_ext_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_table/rte_table_lpm.h b/src/dpdk22/lib/librte_table/rte_table_lpm.h new file mode 100644 index 00000000..06e84102 --- /dev/null +++ b/src/dpdk22/lib/librte_table/rte_table_lpm.h @@ -0,0 +1,118 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TABLE_LPM_H__ +#define __INCLUDE_RTE_TABLE_LPM_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Table LPM for IPv4 + * + * This table uses the Longest Prefix Match (LPM) algorithm to uniquely + * associate data to lookup keys. + * + * Use-case: IP routing table. Routes that are added to the table associate a + * next hop to an IP prefix. The IP prefix is specified as IP address and depth + * and cover for a multitude of lookup keys (i.e. destination IP addresses) + * that all share the same data (i.e. next hop). The next hop information + * typically contains the output interface ID, the IP address of the next hop + * station (which is part of the same IP network the output interface is + * connected to) and other flags and counters. + * + * The LPM primitive only allows associating an 8-bit number (next hop ID) to + * an IP prefix, while a routing table can potentially contain thousands of + * routes or even more. This means that the same next hop ID (and next hop + * information) has to be shared by multiple routes, which makes sense, as + * multiple remote networks could be reached through the same next hop. + * Therefore, when a route is added or updated, the LPM table has to check + * whether the same next hop is already in use before using a new next hop ID + * for this route. + * + * The comparison between different next hops is done for the first + * “entry_unique_size” bytes of the next hop information (configurable + * parameter), which have to uniquely identify the next hop, therefore the user + * has to carefully manage the format of the LPM table entry (i.e. the next + * hop information) so that any next hop data that changes value during + * run-time (e.g. counters) is placed outside of this area. + * + ***/ + +#include + +#include "rte_table.h" + +/** LPM table parameters */ +struct rte_table_lpm_params { + /** Table name */ + const char *name; + + /** Maximum number of LPM rules (i.e. IP routes) */ + uint32_t n_rules; + + /** Number of bytes at the start of the table entry that uniquely + identify the entry. Cannot be bigger than table entry size. */ + uint32_t entry_unique_size; + + /** Byte offset within input packet meta-data where lookup key (i.e. + the destination IP address) is located. */ + uint32_t offset; +}; + +/** LPM table rule (i.e. route), specified as IP prefix. While the key used by +the lookup operation is the destination IP address (read from the input packet +meta-data), the entry add and entry delete operations work with LPM rules, with +each rule covering for a multitude of lookup keys (destination IP addresses) +that share the same data (next hop). */ +struct rte_table_lpm_key { + /** IP address */ + uint32_t ip; + + /** IP address depth. The most significant "depth" bits of the IP + address specify the network part of the IP address, while the rest of + the bits specify the host part of the address and are ignored for the + purpose of route specification. */ + uint8_t depth; +}; + +/** LPM table operations */ +extern struct rte_table_ops rte_table_lpm_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_table/rte_table_lpm_ipv6.h b/src/dpdk22/lib/librte_table/rte_table_lpm_ipv6.h new file mode 100644 index 00000000..43aea399 --- /dev/null +++ b/src/dpdk22/lib/librte_table/rte_table_lpm_ipv6.h @@ -0,0 +1,122 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TABLE_LPM_IPV6_H__ +#define __INCLUDE_RTE_TABLE_LPM_IPV6_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Table LPM for IPv6 + * + * This table uses the Longest Prefix Match (LPM) algorithm to uniquely + * associate data to lookup keys. + * + * Use-case: IP routing table. Routes that are added to the table associate a + * next hop to an IP prefix. The IP prefix is specified as IP address and depth + * and cover for a multitude of lookup keys (i.e. destination IP addresses) + * that all share the same data (i.e. next hop). The next hop information + * typically contains the output interface ID, the IP address of the next hop + * station (which is part of the same IP network the output interface is + * connected to) and other flags and counters. + * + * The LPM primitive only allows associating an 8-bit number (next hop ID) to + * an IP prefix, while a routing table can potentially contain thousands of + * routes or even more. This means that the same next hop ID (and next hop + * information) has to be shared by multiple routes, which makes sense, as + * multiple remote networks could be reached through the same next hop. + * Therefore, when a route is added or updated, the LPM table has to check + * whether the same next hop is already in use before using a new next hop ID + * for this route. + * + * The comparison between different next hops is done for the first + * “entry_unique_size” bytes of the next hop information (configurable + * parameter), which have to uniquely identify the next hop, therefore the user + * has to carefully manage the format of the LPM table entry (i.e. the next + * hop information) so that any next hop data that changes value during + * run-time (e.g. counters) is placed outside of this area. + * + ***/ + +#include + +#include "rte_table.h" + +#define RTE_LPM_IPV6_ADDR_SIZE 16 + +/** LPM table parameters */ +struct rte_table_lpm_ipv6_params { + /** Table name */ + const char *name; + + /** Maximum number of LPM rules (i.e. IP routes) */ + uint32_t n_rules; + + uint32_t number_tbl8s; + + /** Number of bytes at the start of the table entry that uniquely + identify the entry. Cannot be bigger than table entry size. */ + uint32_t entry_unique_size; + + /** Byte offset within input packet meta-data where lookup key (i.e. + the destination IP address) is located. */ + uint32_t offset; +}; + +/** LPM table rule (i.e. route), specified as IP prefix. While the key used by +the lookup operation is the destination IP address (read from the input packet +meta-data), the entry add and entry delete operations work with LPM rules, with +each rule covering for a multitude of lookup keys (destination IP addresses) +that share the same data (next hop). */ +struct rte_table_lpm_ipv6_key { + /** IP address */ + uint8_t ip[RTE_LPM_IPV6_ADDR_SIZE]; + + /** IP address depth. The most significant "depth" bits of the IP + address specify the network part of the IP address, while the rest of + the bits specify the host part of the address and are ignored for the + purpose of route specification. */ + uint8_t depth; +}; + +/** LPM table operations */ +extern struct rte_table_ops rte_table_lpm_ipv6_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/dpdk22/lib/librte_table/rte_table_stub.h b/src/dpdk22/lib/librte_table/rte_table_stub.h new file mode 100644 index 00000000..e75340b0 --- /dev/null +++ b/src/dpdk22/lib/librte_table/rte_table_stub.h @@ -0,0 +1,62 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TABLE_STUB_H__ +#define __INCLUDE_RTE_TABLE_STUB_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Table Stub + * + * The stub table lookup operation produces lookup miss for all input packets. + * + ***/ + +#include + +#include "rte_table.h" + +/** Stub table parameters: NONE */ + +/** Stub table operations */ +extern struct rte_table_ops rte_table_stub_ops; + +#ifdef __cplusplus +} +#endif + +#endif -- cgit 1.2.3-korg From 7de16b05fa2ef6feeec8370b36037a59aeb1f8e5 Mon Sep 17 00:00:00 2001 From: Ido Barnea Date: Mon, 14 Dec 2015 05:12:45 +0200 Subject: Changes only to DPDK files: dpdk22 40G fixes for TTL and ip_protocol match and IPv6 support + TX hang issue fix --- src/dpdk22/drivers/net/i40e/i40e_ethdev.c | 50 ++++++++++++++++++++++++++++++ src/dpdk22/drivers/net/i40e/i40e_fdir.c | 22 ++++++++++--- src/dpdk22/drivers/net/ixgbe/ixgbe_fdir.c | 23 ++++++++++++-- src/dpdk22/lib/librte_ether/rte_eth_ctrl.h | 6 ++++ src/dpdk22/lib/librte_ether/rte_ethdev.h | 2 ++ src/dpdk22/lib/librte_mbuf/rte_mbuf.h | 5 ++- src/pal/linux_dpdk/dpdk22/rte_config.h | 6 ++-- 7 files changed, 104 insertions(+), 10 deletions(-) (limited to 'src/dpdk22/lib') diff --git a/src/dpdk22/drivers/net/i40e/i40e_ethdev.c b/src/dpdk22/drivers/net/i40e/i40e_ethdev.c index 57de71d5..7542ade1 100644 --- a/src/dpdk22/drivers/net/i40e/i40e_ethdev.c +++ b/src/dpdk22/drivers/net/i40e/i40e_ethdev.c @@ -690,6 +690,52 @@ static inline void i40e_flex_payload_reg_init(struct i40e_hw *hw) #define I40E_FLOW_CONTROL_ETHERTYPE 0x8808 +#define TREX_PATCH +#ifdef TREX_PATCH +#define I40E_PRTQF_FD_INSET(_i, _j) (0x00250000 + ((_i) * 64 + (_j) * 32)) +#define I40E_GLQF_FD_MSK(_i, _j) (0x00267200 + ((_i) * 4 + (_j) * 8)) + +static void i40e_dump_filter_regs(struct i40e_hw *hw) +{ + int reg_nums[] = {31, 33, 34, 35, 41, 43}; + int i; + uint32_t reg; + + for (i =0; i < sizeof (reg_nums)/sizeof(int); i++) { + reg = I40E_READ_REG(hw,I40E_PRTQF_FD_INSET(reg_nums[i], 0)); + printf("I40E_PRTQF_FD_INSET(%d, 0): 0x%08x\n", reg_nums[i], reg); + reg = I40E_READ_REG(hw,I40E_PRTQF_FD_INSET(reg_nums[i], 1)); + printf("I40E_PRTQF_FD_INSET(%d, 1): 0x%08x\n", reg_nums[i], reg); + } +} + +static inline void i40e_filter_fields_reg_init(struct i40e_hw *hw) +{ + uint32_t reg; + + I40E_WRITE_REG(hw, I40E_GLQF_ORT(12), 0x00000062); + I40E_WRITE_REG(hw, I40E_GLQF_PIT(2), 0x000024A0); + I40E_WRITE_REG(hw, I40E_PRTQF_FD_INSET(31, 0), 0); + I40E_WRITE_REG(hw, I40E_PRTQF_FD_INSET(31, 1), 0x00040000); + I40E_WRITE_REG(hw, I40E_PRTQF_FD_INSET(33, 0), 0); + I40E_WRITE_REG(hw, I40E_PRTQF_FD_INSET(33, 1), 0x00040000); + I40E_WRITE_REG(hw, I40E_PRTQF_FD_INSET(41, 0), 0); + I40E_WRITE_REG(hw, I40E_PRTQF_FD_INSET(41, 1), 0x00080000); + I40E_WRITE_REG(hw, I40E_PRTQF_FD_INSET(43, 0), 0); + I40E_WRITE_REG(hw, I40E_PRTQF_FD_INSET(43, 1), 0x00080000); + I40E_WRITE_REG(hw, I40E_PRTQF_FD_INSET(34, 0), 0); + I40E_WRITE_REG(hw, I40E_PRTQF_FD_INSET(34, 1), 0x00040000); + // filter IP according to ttl and L4 protocol + I40E_WRITE_REG(hw, I40E_PRTQF_FD_INSET(35, 0), 0); + I40E_WRITE_REG(hw, I40E_PRTQF_FD_INSET(35, 1), 0x00040000); + I40E_WRITE_REG(hw, I40E_PRTQF_FD_INSET(44, 0), 0); + I40E_WRITE_REG(hw, I40E_PRTQF_FD_INSET(44, 1), 0x00080000); + I40E_WRITE_REG(hw, I40E_GLQF_FD_MSK(0, 34), 0x000DFF00); + I40E_WRITE_REG(hw, I40E_GLQF_FD_MSK(0,44), 0x000C00FF); + I40E_WRITE_FLUSH(hw); +} +#endif //TREX_PATCH + /* * Add a ethertype filter to drop all flow control frames transmitted * from VSIs. @@ -786,7 +832,11 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) * for flexible payload by software. * It should be removed once issues are fixed in NVM. */ +#ifdef TREX_PATCH + i40e_filter_fields_reg_init(hw); +#else i40e_flex_payload_reg_init(hw); +#endif /* Initialize the parameters for adminq */ i40e_init_adminq_parameter(hw); diff --git a/src/dpdk22/drivers/net/i40e/i40e_fdir.c b/src/dpdk22/drivers/net/i40e/i40e_fdir.c index 9ad6981c..194f8629 100644 --- a/src/dpdk22/drivers/net/i40e/i40e_fdir.c +++ b/src/dpdk22/drivers/net/i40e/i40e_fdir.c @@ -719,7 +719,8 @@ i40e_fdir_fill_eth_ip_head(const struct rte_eth_fdir_input *fdir_input, ip->version_ihl = I40E_FDIR_IP_DEFAULT_VERSION_IHL; /* set len to by default */ ip->total_length = rte_cpu_to_be_16(I40E_FDIR_IP_DEFAULT_LEN); - ip->time_to_live = I40E_FDIR_IP_DEFAULT_TTL; + // TREX_PATCH + ip->time_to_live = fdir_input->flow.ip4_flow.ttl; /* * The source and destination fields in the transmitted packet * need to be presented in a reversed order with respect @@ -727,7 +728,13 @@ i40e_fdir_fill_eth_ip_head(const struct rte_eth_fdir_input *fdir_input, */ ip->src_addr = fdir_input->flow.ip4_flow.dst_ip; ip->dst_addr = fdir_input->flow.ip4_flow.src_ip; - ip->next_proto_id = next_proto[fdir_input->flow_type]; + // TREX_PATCH + if (fdir_input->flow_type == RTE_ETH_FLOW_FRAG_IPV4 + || fdir_input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_OTHER) { + ip->next_proto_id = fdir_input->flow.ip4_flow.l4_protocol; + } else { + ip->next_proto_id = next_proto[fdir_input->flow_type]; + } break; case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: @@ -741,7 +748,8 @@ i40e_fdir_fill_eth_ip_head(const struct rte_eth_fdir_input *fdir_input, rte_cpu_to_be_32(I40E_FDIR_IPv6_DEFAULT_VTC_FLOW); ip6->payload_len = rte_cpu_to_be_16(I40E_FDIR_IPv6_PAYLOAD_LEN); - ip6->hop_limits = I40E_FDIR_IPv6_DEFAULT_HOP_LIMITS; + // TREX_PATCH + ip6->hop_limits = fdir_input->flow.ipv6_flow.hop_limit; /* * The source and destination fields in the transmitted packet @@ -754,7 +762,13 @@ i40e_fdir_fill_eth_ip_head(const struct rte_eth_fdir_input *fdir_input, rte_memcpy(&(ip6->dst_addr), &(fdir_input->flow.ipv6_flow.src_ip), IPV6_ADDR_LEN); - ip6->proto = next_proto[fdir_input->flow_type]; + // TREX_PATCH + if (fdir_input->flow_type == RTE_ETH_FLOW_FRAG_IPV6 + || fdir_input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV6_OTHER) { + ip6->proto = fdir_input->flow.ipv6_flow.l4_protocol; + } else { + ip6->proto = next_proto[fdir_input->flow_type]; + } break; default: PMD_DRV_LOG(ERR, "unknown flow type %u.", diff --git a/src/dpdk22/drivers/net/ixgbe/ixgbe_fdir.c b/src/dpdk22/drivers/net/ixgbe/ixgbe_fdir.c index e03219b1..3ebeac4a 100644 --- a/src/dpdk22/drivers/net/ixgbe/ixgbe_fdir.c +++ b/src/dpdk22/drivers/net/ixgbe/ixgbe_fdir.c @@ -49,6 +49,7 @@ #include "base/ixgbe_common.h" #include "ixgbe_ethdev.h" +#define TREX_PATCH /* To get PBALLOC (Packet Buffer Allocation) bits from FDIRCTRL value */ #define FDIRCTRL_PBALLOC_MASK 0x03 @@ -248,9 +249,13 @@ configure_fdir_flags(const struct rte_fdir_conf *conf, uint32_t *fdirctrl) PMD_INIT_LOG(ERR, "Invalid fdir_conf->status value"); return -EINVAL; }; - +#define TREX_PATCH +#ifdef TREX_PATCH + *fdirctrl |= (conf->flexbytes_offset << IXGBE_FDIRCTRL_FLEX_SHIFT); +#else *fdirctrl |= (IXGBE_DEFAULT_FLEXBYTES_OFFSET / sizeof(uint16_t)) << IXGBE_FDIRCTRL_FLEX_SHIFT; +#endif if (conf->mode >= RTE_FDIR_MODE_PERFECT && conf->mode <= RTE_FDIR_MODE_PERFECT_TUNNEL) { @@ -507,7 +512,7 @@ ixgbe_set_fdir_flex_conf(struct rte_eth_dev *dev, uint16_t i; fdirm = IXGBE_READ_REG(hw, IXGBE_FDIRM); - +#ifndef TREX_PATCH if (conf == NULL) { PMD_DRV_LOG(ERR, "NULL pointer."); return -EINVAL; @@ -548,6 +553,11 @@ ixgbe_set_fdir_flex_conf(struct rte_eth_dev *dev, return -EINVAL; } } +#else + fdirm &= ~IXGBE_FDIRM_FLEX; + flexbytes = 1; + // fdirctrl gets flex_bytes_offset in configure_fdir_flags +#endif IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm); info->mask.flex_bytes_mask = flexbytes ? UINT16_MAX : 0; info->flex_bytes_offset = (uint8_t)((*fdirctrl & @@ -577,7 +587,11 @@ ixgbe_fdir_configure(struct rte_eth_dev *dev) if (hw->mac.type != ixgbe_mac_X550 && hw->mac.type != ixgbe_mac_X550EM_x && mode != RTE_FDIR_MODE_SIGNATURE && - mode != RTE_FDIR_MODE_PERFECT) + mode != RTE_FDIR_MODE_PERFECT +#ifdef TREX_PATCH + && mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN +#endif + ) return -ENOSYS; err = configure_fdir_flags(&dev->data->dev_conf.fdir_conf, &fdirctrl); @@ -1116,11 +1130,14 @@ ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev, return err; if (is_perfect) { +#ifndef TREX_PATCH + // No reason not use IPV6 in perfect filters. It is working. if (input.formatted.flow_type & IXGBE_ATR_L4TYPE_IPV6_MASK) { PMD_DRV_LOG(ERR, "IPv6 is not supported in" " perfect mode!"); return -ENOTSUP; } +#endif fdirhash = atr_compute_perfect_hash_82599(&input, dev->data->dev_conf.fdir_conf.pballoc); fdirhash |= fdir_filter->soft_id << diff --git a/src/dpdk22/lib/librte_ether/rte_eth_ctrl.h b/src/dpdk22/lib/librte_ether/rte_eth_ctrl.h index ce224adb..dc26439d 100644 --- a/src/dpdk22/lib/librte_ether/rte_eth_ctrl.h +++ b/src/dpdk22/lib/librte_ether/rte_eth_ctrl.h @@ -407,6 +407,9 @@ struct rte_eth_l2_flow { struct rte_eth_ipv4_flow { uint32_t src_ip; /**< IPv4 source address to match. */ uint32_t dst_ip; /**< IPv4 destination address to match. */ + // TREX_PATCH + uint8_t ttl; /**< IPv4 ttl to match */ + uint8_t l4_protocol; /**< IPv4 l4 protocol to match */ }; /** @@ -443,6 +446,9 @@ struct rte_eth_sctpv4_flow { struct rte_eth_ipv6_flow { uint32_t src_ip[4]; /**< IPv6 source address to match. */ uint32_t dst_ip[4]; /**< IPv6 destination address to match. */ + // TREX_PATCH + uint8_t hop_limit; /**< IPv6 hop limit to match */ + uint8_t l4_protocol; /**< IPv6 l4 protocol to match */ }; /** diff --git a/src/dpdk22/lib/librte_ether/rte_ethdev.h b/src/dpdk22/lib/librte_ether/rte_ethdev.h index bada8ade..f8c7c86d 100644 --- a/src/dpdk22/lib/librte_ether/rte_ethdev.h +++ b/src/dpdk22/lib/librte_ether/rte_ethdev.h @@ -734,6 +734,8 @@ struct rte_fdir_conf { struct rte_eth_fdir_masks mask; struct rte_eth_fdir_flex_conf flex_conf; /**< Flex payload configuration. */ + // TREX_PATCH + uint8_t flexbytes_offset; }; /** diff --git a/src/dpdk22/lib/librte_mbuf/rte_mbuf.h b/src/dpdk22/lib/librte_mbuf/rte_mbuf.h index f234ac9a..683b2ef3 100644 --- a/src/dpdk22/lib/librte_mbuf/rte_mbuf.h +++ b/src/dpdk22/lib/librte_mbuf/rte_mbuf.h @@ -975,6 +975,9 @@ rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value) static inline uint16_t rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value) { + // TREX_PATCH - The code in #if 0 caused tx queue to hang when running: + // sudo ./t-rex-64-o -f avl/sfr_delay_10_1g_no_bundeling.yaml -m 35 -p -d 100 +#if 0 /* * The atomic_add is an expensive operation, so we don't want to * call it in the case where we know we are the uniq holder of @@ -986,7 +989,7 @@ rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value) rte_mbuf_refcnt_set(m, 1 + value); return 1 + value; } - +#endif return (uint16_t)(rte_atomic16_add_return(&m->refcnt_atomic, value)); } diff --git a/src/pal/linux_dpdk/dpdk22/rte_config.h b/src/pal/linux_dpdk/dpdk22/rte_config.h index 01d9b7a1..e1f5cb23 100644 --- a/src/pal/linux_dpdk/dpdk22/rte_config.h +++ b/src/pal/linux_dpdk/dpdk22/rte_config.h @@ -219,8 +219,10 @@ #undef RTE_MBUF_REFCNT_ATOMIC #define RTE_MBUF_REFCNT_ATOMIC 1 #undef RTE_PKTMBUF_HEADROOM -//???#define RTE_PKTMBUF_HEADROOM 128 -#define RTE_PKTMBUF_HEADROOM 16 +// TREX_PATCH: DPDK original value is 128 here. This creates big overhead of memory. +// We would like to put 0, but it cuases compilation issues with virtio driver. +// 16 caused big performance degradation because of alignment issues. So 64 is the winner. +#define RTE_PKTMBUF_HEADROOM 64 #undef RTE_LIBRTE_MBUF_OFFLOAD #define RTE_LIBRTE_MBUF_OFFLOAD 1 #undef RTE_LIBRTE_MBUF_OFFLOAD_DEBUG -- cgit 1.2.3-korg