aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Makefile1
-rw-r--r--lib/librte_acl/Makefile2
-rw-r--r--lib/librte_acl/acl.h4
-rw-r--r--lib/librte_acl/acl_run.h2
-rw-r--r--lib/librte_acl/acl_run_altivec.c (renamed from lib/librte_eal/bsdapp/eal/eal_log.c)32
-rw-r--r--lib/librte_acl/acl_run_altivec.h329
-rw-r--r--lib/librte_acl/rte_acl.c13
-rw-r--r--lib/librte_acl/rte_acl.h3
-rw-r--r--lib/librte_cfgfile/rte_cfgfile.h33
-rw-r--r--lib/librte_cmdline/cmdline.h1
-rw-r--r--lib/librte_cmdline/cmdline_parse_portlist.h1
-rw-r--r--lib/librte_cmdline/cmdline_socket.h3
-rw-r--r--lib/librte_cryptodev/Makefile2
-rw-r--r--lib/librte_cryptodev/rte_crypto.h2
-rw-r--r--lib/librte_cryptodev/rte_crypto_sym.h89
-rw-r--r--lib/librte_cryptodev/rte_cryptodev.c72
-rw-r--r--lib/librte_cryptodev/rte_cryptodev.h60
-rw-r--r--lib/librte_cryptodev/rte_cryptodev_pmd.h51
-rw-r--r--lib/librte_cryptodev/rte_cryptodev_version.map9
-rw-r--r--lib/librte_eal/bsdapp/eal/Makefile6
-rw-r--r--lib/librte_eal/bsdapp/eal/eal.c9
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_pci.c54
-rw-r--r--lib/librte_eal/bsdapp/eal/rte_eal_version.map12
-rw-r--r--lib/librte_eal/common/Makefile4
-rw-r--r--lib/librte_eal/common/eal_common_cpuflags.c6
-rw-r--r--lib/librte_eal/common/eal_common_dev.c95
-rw-r--r--lib/librte_eal/common/eal_common_log.c49
-rw-r--r--lib/librte_eal/common/eal_common_memzone.c12
-rw-r--r--lib/librte_eal/common/eal_common_options.c2
-rw-r--r--lib/librte_eal/common/eal_common_pci.c59
-rw-r--r--lib/librte_eal/common/eal_common_timer.c17
-rw-r--r--lib/librte_eal/common/eal_common_vdev.c116
-rw-r--r--lib/librte_eal/common/eal_filesystem.h11
-rw-r--r--lib/librte_eal/common/eal_hugepages.h3
-rw-r--r--lib/librte_eal/common/eal_private.h50
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_byteorder.h2
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_cycles_64.h33
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h3
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h1
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h1
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_vect.h1
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h1
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h1
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h2
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h3
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h1
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_vect.h (renamed from lib/librte_vhost/vhost_cuse/virtio-net-cdev.h)48
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_atomic.h2
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_atomic_32.h9
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_atomic_64.h8
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_byteorder.h2
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h7
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h7
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_cycles.h2
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_memcpy.h4
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_prefetch.h1
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_rtm.h1
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_vect.h8
-rw-r--r--lib/librte_eal/common/include/generic/rte_atomic.h1
-rw-r--r--lib/librte_eal/common/include/generic/rte_byteorder.h2
-rw-r--r--lib/librte_eal/common/include/generic/rte_cpuflags.h3
-rw-r--r--lib/librte_eal/common/include/generic/rte_cycles.h24
-rw-r--r--lib/librte_eal/common/include/generic/rte_memcpy.h4
-rw-r--r--lib/librte_eal/common/include/rte_common.h22
-rw-r--r--lib/librte_eal/common/include/rte_dev.h88
-rw-r--r--lib/librte_eal/common/include/rte_devargs.h9
-rw-r--r--lib/librte_eal/common/include/rte_eal.h4
-rw-r--r--lib/librte_eal/common/include/rte_interrupts.h2
-rw-r--r--lib/librte_eal/common/include/rte_log.h46
-rw-r--r--lib/librte_eal/common/include/rte_malloc.h2
-rw-r--r--lib/librte_eal/common/include/rte_memory.h9
-rw-r--r--lib/librte_eal/common/include/rte_memzone.h11
-rw-r--r--lib/librte_eal/common/include/rte_pci.h62
-rw-r--r--lib/librte_eal/common/include/rte_pci_dev_ids.h326
-rw-r--r--lib/librte_eal/common/include/rte_tailq.h6
-rw-r--r--lib/librte_eal/common/include/rte_time.h8
-rw-r--r--lib/librte_eal/common/include/rte_vdev.h102
-rw-r--r--lib/librte_eal/common/include/rte_version.h5
-rw-r--r--lib/librte_eal/common/malloc_heap.c8
-rw-r--r--lib/librte_eal/linuxapp/eal/Makefile12
-rw-r--r--lib/librte_eal/linuxapp/eal/eal.c38
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_ivshmem.c954
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_log.c40
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_memory.c327
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci.c23
-rw-r--r--lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h1
-rw-r--r--lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h10
-rw-r--r--lib/librte_eal/linuxapp/eal/rte_eal_version.map12
-rw-r--r--lib/librte_eal/linuxapp/kni/Makefile5
-rw-r--r--lib/librte_eal/linuxapp/kni/compat.h31
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING339
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c28
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c260
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c3
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c363
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c944
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c1482
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h21
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c1171
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING339
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h73
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h14
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_dev.h59
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_ethtool.c39
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_fifo.h30
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_misc.c515
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_net.c471
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_vhost.c199
-rw-r--r--lib/librte_ether/Makefile7
-rw-r--r--lib/librte_ether/rte_dev_info.h2
-rw-r--r--lib/librte_ether/rte_eth_ctrl.h4
-rw-r--r--lib/librte_ether/rte_ethdev.c285
-rw-r--r--lib/librte_ether/rte_ethdev.h71
-rw-r--r--lib/librte_ether/rte_ether_version.map10
-rw-r--r--lib/librte_hash/rte_cuckoo_hash.c472
-rw-r--r--lib/librte_hash/rte_cuckoo_hash.h70
-rw-r--r--lib/librte_hash/rte_cuckoo_hash_x86.h25
-rw-r--r--lib/librte_hash/rte_fbk_hash.h2
-rw-r--r--lib/librte_hash/rte_thash.h3
-rw-r--r--lib/librte_ip_frag/Makefile1
-rw-r--r--lib/librte_ip_frag/rte_ip_frag.h2
-rw-r--r--lib/librte_ivshmem/Makefile54
-rw-r--r--lib/librte_ivshmem/rte_ivshmem.c919
-rw-r--r--lib/librte_ivshmem/rte_ivshmem.h165
-rw-r--r--lib/librte_ivshmem/rte_ivshmem_version.map12
-rw-r--r--lib/librte_kni/rte_kni.c74
-rw-r--r--lib/librte_kni/rte_kni.h3
-rw-r--r--lib/librte_lpm/Makefile2
-rw-r--r--lib/librte_lpm/rte_lpm.c22
-rw-r--r--lib/librte_lpm/rte_lpm.h9
-rw-r--r--lib/librte_lpm/rte_lpm_altivec.h154
-rw-r--r--lib/librte_lpm/rte_lpm_neon.h1
-rw-r--r--lib/librte_lpm/rte_lpm_sse.h1
-rw-r--r--lib/librte_mbuf/Makefile4
-rw-r--r--lib/librte_mbuf/rte_mbuf.c155
-rw-r--r--lib/librte_mbuf/rte_mbuf.h671
-rw-r--r--lib/librte_mbuf/rte_mbuf_ptype.c227
-rw-r--r--lib/librte_mbuf/rte_mbuf_ptype.h668
-rw-r--r--lib/librte_mbuf/rte_mbuf_version.map17
-rw-r--r--lib/librte_mempool/rte_mempool.c6
-rw-r--r--lib/librte_mempool/rte_mempool.h14
-rw-r--r--lib/librte_meter/rte_meter.h24
-rw-r--r--lib/librte_net/Makefile14
-rw-r--r--lib/librte_net/rte_ether.h (renamed from lib/librte_ether/rte_ether.h)7
-rw-r--r--lib/librte_net/rte_gre.h (renamed from lib/librte_vhost/vhost_user/virtio-net-user.h)61
-rw-r--r--lib/librte_net/rte_ip.h71
-rw-r--r--lib/librte_net/rte_net.c517
-rw-r--r--lib/librte_net/rte_net.h (renamed from lib/librte_vhost/vhost_cuse/eventfd_copy.h)67
-rw-r--r--lib/librte_net/rte_net_version.map6
-rw-r--r--lib/librte_pdump/rte_pdump.c41
-rw-r--r--lib/librte_pdump/rte_pdump.h4
-rw-r--r--lib/librte_pipeline/rte_pipeline.h4
-rw-r--r--lib/librte_port/Makefile2
-rw-r--r--lib/librte_port/rte_port_fd.c552
-rw-r--r--lib/librte_port/rte_port_fd.h (renamed from lib/librte_vhost/vhost_cuse/eventfd_copy.c)125
-rw-r--r--lib/librte_port/rte_port_source_sink.h4
-rw-r--r--lib/librte_port/rte_port_version.map9
-rw-r--r--lib/librte_reorder/rte_reorder.h2
-rw-r--r--lib/librte_ring/rte_ring.h4
-rw-r--r--lib/librte_sched/rte_bitmap.h3
-rw-r--r--lib/librte_sched/rte_reciprocal.h2
-rw-r--r--lib/librte_sched/rte_sched_common.h1
-rw-r--r--lib/librte_table/Makefile4
-rw-r--r--lib/librte_table/rte_table_hash.h31
-rw-r--r--lib/librte_table/rte_table_hash_cuckoo.c382
-rw-r--r--lib/librte_table/rte_table_hash_key16.c4
-rw-r--r--lib/librte_table/rte_table_hash_key32.c4
-rw-r--r--lib/librte_table/rte_table_hash_key8.c4
-rw-r--r--lib/librte_table/rte_table_version.map7
-rw-r--r--lib/librte_timer/rte_timer.h2
-rw-r--r--lib/librte_vhost/Makefile13
-rw-r--r--lib/librte_vhost/eventfd_link/Makefile41
-rw-r--r--lib/librte_vhost/eventfd_link/eventfd_link.c277
-rw-r--r--lib/librte_vhost/eventfd_link/eventfd_link.h94
-rw-r--r--lib/librte_vhost/fd_man.c (renamed from lib/librte_vhost/vhost_user/fd_man.c)0
-rw-r--r--lib/librte_vhost/fd_man.h (renamed from lib/librte_vhost/vhost_user/fd_man.h)0
-rwxr-xr-xlib/librte_vhost/libvirt/qemu-wrap.py387
-rw-r--r--lib/librte_vhost/rte_virtio_net.h10
-rw-r--r--lib/librte_vhost/socket.c (renamed from lib/librte_vhost/vhost_user/vhost-net-user.c)214
-rw-r--r--lib/librte_vhost/vhost.c430
-rw-r--r--lib/librte_vhost/vhost.h (renamed from lib/librte_vhost/vhost-net.h)135
-rw-r--r--lib/librte_vhost/vhost_cuse/vhost-net-cdev.c431
-rw-r--r--lib/librte_vhost/vhost_cuse/virtio-net-cdev.c433
-rw-r--r--lib/librte_vhost/vhost_user.c1033
-rw-r--r--lib/librte_vhost/vhost_user.h (renamed from lib/librte_vhost/vhost_user/vhost-net-user.h)17
-rw-r--r--lib/librte_vhost/vhost_user/virtio-net-user.c470
-rw-r--r--lib/librte_vhost/virtio-net.c847
-rw-r--r--lib/librte_vhost/virtio_net.c (renamed from lib/librte_vhost/vhost_rxtx.c)531
235 files changed, 7717 insertions, 13739 deletions
diff --git a/lib/Makefile b/lib/Makefile
index ca7c02fd..990f23a4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -61,7 +61,6 @@ DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += librte_pdump
ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
-DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += librte_ivshmem
endif
include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile
index 9803e9dd..d05be665 100644
--- a/lib/librte_acl/Makefile
+++ b/lib/librte_acl/Makefile
@@ -52,6 +52,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_scalar.c
ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),)
SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_neon.c
CFLAGS_acl_run_neon.o += -flax-vector-conversions -Wno-maybe-uninitialized
+else ifeq ($(CONFIG_RTE_ARCH_PPC_64),y)
+SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_altivec.c
else
SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_sse.c
#check if flag for SSE4.1 is already on, if not set it up manually
diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h
index 09d67841..6664a55e 100644
--- a/lib/librte_acl/acl.h
+++ b/lib/librte_acl/acl.h
@@ -234,6 +234,10 @@ int
rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data,
uint32_t *results, uint32_t num, uint32_t categories);
+int
+rte_acl_classify_altivec(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t num, uint32_t categories);
+
#ifdef __cplusplus
}
#endif /* __cplusplus */
diff --git a/lib/librte_acl/acl_run.h b/lib/librte_acl/acl_run.h
index b2fc42c6..024f3931 100644
--- a/lib/librte_acl/acl_run.h
+++ b/lib/librte_acl/acl_run.h
@@ -39,7 +39,9 @@
#define MAX_SEARCHES_AVX16 16
#define MAX_SEARCHES_SSE8 8
+#define MAX_SEARCHES_ALTIVEC8 8
#define MAX_SEARCHES_SSE4 4
+#define MAX_SEARCHES_ALTIVEC4 4
#define MAX_SEARCHES_SCALAR 2
#define GET_NEXT_4BYTES(prm, idx) \
diff --git a/lib/librte_eal/bsdapp/eal/eal_log.c b/lib/librte_acl/acl_run_altivec.c
index a425f7a8..35235260 100644
--- a/lib/librte_eal/bsdapp/eal/eal_log.c
+++ b/lib/librte_acl/acl_run_altivec.c
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright (C) IBM Corporation 2016.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,27 +31,17 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <stdio.h>
-#include <rte_common.h>
-#include <rte_log.h>
-
-#include <eal_private.h>
-
-/*
- * set the log to default function, called during eal init process,
- * once memzones are available.
- */
-int
-rte_eal_log_init(const char *id __rte_unused, int facility __rte_unused)
-{
- if (rte_eal_common_log_init(stderr) < 0)
- return -1;
- return 0;
-}
+#include "acl_run_altivec.h"
int
-rte_eal_log_early_init(void)
+rte_acl_classify_altivec(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t num, uint32_t categories)
{
- rte_openlog_stream(stderr);
- return 0;
+ if (likely(num >= MAX_SEARCHES_ALTIVEC8))
+ return search_altivec_8(ctx, data, results, num, categories);
+ else if (num >= MAX_SEARCHES_ALTIVEC4)
+ return search_altivec_4(ctx, data, results, num, categories);
+ else
+ return rte_acl_classify_scalar(ctx, data, results, num,
+ categories);
}
diff --git a/lib/librte_acl/acl_run_altivec.h b/lib/librte_acl/acl_run_altivec.h
new file mode 100644
index 00000000..7d329bcf
--- /dev/null
+++ b/lib/librte_acl/acl_run_altivec.h
@@ -0,0 +1,329 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) IBM Corporation 2016.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "acl_run.h"
+#include "acl_vect.h"
+
+struct _altivec_acl_const {
+ rte_xmm_t xmm_shuffle_input;
+ rte_xmm_t xmm_index_mask;
+ rte_xmm_t xmm_ones_16;
+ rte_xmm_t range_base;
+} altivec_acl_const __attribute__((aligned(RTE_CACHE_LINE_SIZE))) = {
+ {
+ .u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c}
+ },
+ {
+ .u32 = {RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX,
+ RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX}
+ },
+ {
+ .u16 = {1, 1, 1, 1, 1, 1, 1, 1}
+ },
+ {
+ .u32 = {0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c}
+ },
+};
+
+/*
+ * Resolve priority for multiple results (altivec version).
+ * This consists comparing the priority of the current traversal with the
+ * running set of results for the packet.
+ * For each result, keep a running array of the result (rule number) and
+ * its priority for each category.
+ */
+static inline void
+resolve_priority_altivec(uint64_t transition, int n,
+ const struct rte_acl_ctx *ctx, struct parms *parms,
+ const struct rte_acl_match_results *p, uint32_t categories)
+{
+ uint32_t x;
+ xmm_t results, priority, results1, priority1;
+ vector bool int selector;
+ xmm_t *saved_results, *saved_priority;
+
+ for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) {
+
+ saved_results = (xmm_t *)(&parms[n].cmplt->results[x]);
+ saved_priority =
+ (xmm_t *)(&parms[n].cmplt->priority[x]);
+
+ /* get results and priorities for completed trie */
+ results = *(const xmm_t *)&p[transition].results[x];
+ priority = *(const xmm_t *)&p[transition].priority[x];
+
+ /* if this is not the first completed trie */
+ if (parms[n].cmplt->count != ctx->num_tries) {
+
+ /* get running best results and their priorities */
+ results1 = *saved_results;
+ priority1 = *saved_priority;
+
+ /* select results that are highest priority */
+ selector = vec_cmpgt(priority1, priority);
+ results = vec_sel(results, results1, selector);
+ priority = vec_sel(priority, priority1,
+ selector);
+ }
+
+ /* save running best results and their priorities */
+ *saved_results = results;
+ *saved_priority = priority;
+ }
+}
+
+/*
+ * Check for any match in 4 transitions
+ */
+static inline __attribute__((always_inline)) uint32_t
+check_any_match_x4(uint64_t val[])
+{
+ return (val[0] | val[1] | val[2] | val[3]) & RTE_ACL_NODE_MATCH;
+}
+
+static inline __attribute__((always_inline)) void
+acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
+ struct acl_flow_data *flows, uint64_t transitions[])
+{
+ while (check_any_match_x4(transitions)) {
+ transitions[0] = acl_match_check(transitions[0], slot, ctx,
+ parms, flows, resolve_priority_altivec);
+ transitions[1] = acl_match_check(transitions[1], slot + 1, ctx,
+ parms, flows, resolve_priority_altivec);
+ transitions[2] = acl_match_check(transitions[2], slot + 2, ctx,
+ parms, flows, resolve_priority_altivec);
+ transitions[3] = acl_match_check(transitions[3], slot + 3, ctx,
+ parms, flows, resolve_priority_altivec);
+ }
+}
+
+/*
+ * Process 4 transitions (in 2 XMM registers) in parallel
+ */
+static inline __attribute__((optimize("O2"))) xmm_t
+transition4(xmm_t next_input, const uint64_t *trans,
+ xmm_t *indices1, xmm_t *indices2)
+{
+ xmm_t addr, tr_lo, tr_hi;
+ xmm_t in, node_type, r, t;
+ xmm_t dfa_ofs, quad_ofs;
+ xmm_t *index_mask, *tp;
+ vector bool int dfa_msk;
+ vector signed char zeroes = {};
+ union {
+ uint64_t d64[2];
+ uint32_t d32[4];
+ } v;
+
+ /* Move low 32 into tr_lo and high 32 into tr_hi */
+ tr_lo = (xmm_t){(*indices1)[0], (*indices1)[2],
+ (*indices2)[0], (*indices2)[2]};
+ tr_hi = (xmm_t){(*indices1)[1], (*indices1)[3],
+ (*indices2)[1], (*indices2)[3]};
+
+ /* Calculate the address (array index) for all 4 transitions. */
+ index_mask = (xmm_t *)&altivec_acl_const.xmm_index_mask.u32;
+ t = vec_xor(*index_mask, *index_mask);
+ in = vec_perm(next_input, (xmm_t){},
+ *(vector unsigned char *)&altivec_acl_const.xmm_shuffle_input);
+
+ /* Calc node type and node addr */
+ node_type = vec_and(vec_nor(*index_mask, *index_mask), tr_lo);
+ addr = vec_and(tr_lo, *index_mask);
+
+ /* mask for DFA type(0) nodes */
+ dfa_msk = vec_cmpeq(node_type, t);
+
+ /* DFA calculations. */
+ r = vec_sr(in, (vector unsigned int){30, 30, 30, 30});
+ tp = (xmm_t *)&altivec_acl_const.range_base.u32;
+ r = vec_add(r, *tp);
+ t = vec_sr(in, (vector unsigned int){24, 24, 24, 24});
+ r = vec_perm(tr_hi, (xmm_t){(uint16_t)0 << 16},
+ (vector unsigned char)r);
+
+ dfa_ofs = vec_sub(t, r);
+
+ /* QUAD/SINGLE caluclations. */
+ t = (xmm_t)vec_cmpgt((vector signed char)in, (vector signed char)tr_hi);
+ t = (xmm_t)vec_sel(
+ vec_sel(
+ (vector signed char)vec_sub(
+ zeroes, (vector signed char)t),
+ (vector signed char)t,
+ vec_cmpgt((vector signed char)t, zeroes)),
+ zeroes,
+ vec_cmpeq((vector signed char)t, zeroes));
+
+ t = (xmm_t)vec_msum((vector signed char)t,
+ (vector unsigned char)t, (xmm_t){});
+ quad_ofs = (xmm_t)vec_msum((vector signed short)t,
+ *(vector signed short *)&altivec_acl_const.xmm_ones_16.u16,
+ (xmm_t){});
+
+ /* blend DFA and QUAD/SINGLE. */
+ t = vec_sel(quad_ofs, dfa_ofs, dfa_msk);
+
+ /* calculate address for next transitions. */
+ addr = vec_add(addr, t);
+
+ v.d64[0] = (uint64_t)trans[addr[0]];
+ v.d64[1] = (uint64_t)trans[addr[1]];
+ *indices1 = (xmm_t){v.d32[0], v.d32[1], v.d32[2], v.d32[3]};
+ v.d64[0] = (uint64_t)trans[addr[2]];
+ v.d64[1] = (uint64_t)trans[addr[3]];
+ *indices2 = (xmm_t){v.d32[0], v.d32[1], v.d32[2], v.d32[3]};
+
+ return vec_sr(next_input,
+ (vector unsigned int){CHAR_BIT, CHAR_BIT, CHAR_BIT, CHAR_BIT});
+}
+
+/*
+ * Execute trie traversal with 8 traversals in parallel
+ */
+static inline int
+search_altivec_8(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t total_packets, uint32_t categories)
+{
+ int n;
+ struct acl_flow_data flows;
+ uint64_t index_array[MAX_SEARCHES_ALTIVEC8];
+ struct completion cmplt[MAX_SEARCHES_ALTIVEC8];
+ struct parms parms[MAX_SEARCHES_ALTIVEC8];
+ xmm_t input0, input1;
+
+ acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+ total_packets, categories, ctx->trans_table);
+
+ for (n = 0; n < MAX_SEARCHES_ALTIVEC8; n++) {
+ cmplt[n].count = 0;
+ index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+ }
+
+ /* Check for any matches. */
+ acl_match_check_x4(0, ctx, parms, &flows, (uint64_t *)&index_array[0]);
+ acl_match_check_x4(4, ctx, parms, &flows, (uint64_t *)&index_array[4]);
+
+ while (flows.started > 0) {
+
+ /* Gather 4 bytes of input data for each stream. */
+ input0 = (xmm_t){GET_NEXT_4BYTES(parms, 0),
+ GET_NEXT_4BYTES(parms, 1),
+ GET_NEXT_4BYTES(parms, 2),
+ GET_NEXT_4BYTES(parms, 3)};
+
+ input1 = (xmm_t){GET_NEXT_4BYTES(parms, 4),
+ GET_NEXT_4BYTES(parms, 5),
+ GET_NEXT_4BYTES(parms, 6),
+ GET_NEXT_4BYTES(parms, 7)};
+
+ /* Process the 4 bytes of input on each stream. */
+
+ input0 = transition4(input0, flows.trans,
+ (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+ input1 = transition4(input1, flows.trans,
+ (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]);
+
+ input0 = transition4(input0, flows.trans,
+ (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+ input1 = transition4(input1, flows.trans,
+ (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]);
+
+ input0 = transition4(input0, flows.trans,
+ (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+ input1 = transition4(input1, flows.trans,
+ (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]);
+
+ input0 = transition4(input0, flows.trans,
+ (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+ input1 = transition4(input1, flows.trans,
+ (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]);
+
+ /* Check for any matches. */
+ acl_match_check_x4(0, ctx, parms, &flows,
+ (uint64_t *)&index_array[0]);
+ acl_match_check_x4(4, ctx, parms, &flows,
+ (uint64_t *)&index_array[4]);
+ }
+
+ return 0;
+}
+
+/*
+ * Execute trie traversal with 4 traversals in parallel
+ */
+static inline int
+search_altivec_4(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, int total_packets, uint32_t categories)
+{
+ int n;
+ struct acl_flow_data flows;
+ uint64_t index_array[MAX_SEARCHES_ALTIVEC4];
+ struct completion cmplt[MAX_SEARCHES_ALTIVEC4];
+ struct parms parms[MAX_SEARCHES_ALTIVEC4];
+ xmm_t input;
+
+ acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+ total_packets, categories, ctx->trans_table);
+
+ for (n = 0; n < MAX_SEARCHES_ALTIVEC4; n++) {
+ cmplt[n].count = 0;
+ index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+ }
+
+ /* Check for any matches. */
+ acl_match_check_x4(0, ctx, parms, &flows, index_array);
+
+ while (flows.started > 0) {
+
+ /* Gather 4 bytes of input data for each stream. */
+ input = (xmm_t){GET_NEXT_4BYTES(parms, 0),
+ GET_NEXT_4BYTES(parms, 1),
+ GET_NEXT_4BYTES(parms, 2),
+ GET_NEXT_4BYTES(parms, 3)};
+
+ /* Process the 4 bytes of input on each stream. */
+ input = transition4(input, flows.trans,
+ (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+ input = transition4(input, flows.trans,
+ (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+ input = transition4(input, flows.trans,
+ (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+ input = transition4(input, flows.trans,
+ (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+
+ /* Check for any matches. */
+ acl_match_check_x4(0, ctx, parms, &flows, index_array);
+ }
+
+ return 0;
+}
diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
index 4ba9786b..8b7e92ce 100644
--- a/lib/librte_acl/rte_acl.c
+++ b/lib/librte_acl/rte_acl.c
@@ -75,12 +75,23 @@ rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx,
return -ENOTSUP;
}
+int __attribute__ ((weak))
+rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx,
+ __rte_unused const uint8_t **data,
+ __rte_unused uint32_t *results,
+ __rte_unused uint32_t num,
+ __rte_unused uint32_t categories)
+{
+ return -ENOTSUP;
+}
+
static const rte_acl_classify_t classify_fns[] = {
[RTE_ACL_CLASSIFY_DEFAULT] = rte_acl_classify_scalar,
[RTE_ACL_CLASSIFY_SCALAR] = rte_acl_classify_scalar,
[RTE_ACL_CLASSIFY_SSE] = rte_acl_classify_sse,
[RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2,
[RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon,
+ [RTE_ACL_CLASSIFY_ALTIVEC] = rte_acl_classify_altivec,
};
/* by default, use always available scalar code path. */
@@ -119,6 +130,8 @@ rte_acl_init(void)
#elif defined(RTE_ARCH_ARM)
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
alg = RTE_ACL_CLASSIFY_NEON;
+#elif defined(RTE_ARCH_PPC_64)
+ alg = RTE_ACL_CLASSIFY_ALTIVEC;
#else
#ifdef CC_AVX2_SUPPORT
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h
index 0979a098..caa91f7e 100644
--- a/lib/librte_acl/rte_acl.h
+++ b/lib/librte_acl/rte_acl.h
@@ -144,7 +144,7 @@ struct rte_acl_rule_data {
struct rte_acl_field field[fld_num]; \
}
-RTE_ACL_RULE_DEF(rte_acl_rule, 0);
+RTE_ACL_RULE_DEF(rte_acl_rule,);
#define RTE_ACL_RULE_SZ(fld_num) \
(sizeof(struct rte_acl_rule) + sizeof(struct rte_acl_field) * (fld_num))
@@ -271,6 +271,7 @@ enum rte_acl_classify_alg {
RTE_ACL_CLASSIFY_SSE = 2, /**< requires SSE4.1 support. */
RTE_ACL_CLASSIFY_AVX2 = 3, /**< requires AVX2 support. */
RTE_ACL_CLASSIFY_NEON = 4, /**< requires NEON support. */
+ RTE_ACL_CLASSIFY_ALTIVEC = 5, /**< requires ALTIVEC support. */
RTE_ACL_CLASSIFY_NUM /* should always be the last one. */
};
diff --git a/lib/librte_cfgfile/rte_cfgfile.h b/lib/librte_cfgfile/rte_cfgfile.h
index f649836c..b40e6a13 100644
--- a/lib/librte_cfgfile/rte_cfgfile.h
+++ b/lib/librte_cfgfile/rte_cfgfile.h
@@ -34,6 +34,8 @@
#ifndef __INCLUDE_RTE_CFGFILE_H__
#define __INCLUDE_RTE_CFGFILE_H__
+#include <stddef.h>
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -86,7 +88,7 @@ struct rte_cfgfile *rte_cfgfile_load(const char *filename, int flags);
* @param length
* Maximum section name length
* @return
-* 0 on success, error code otherwise
+* Number of sections
*/
int rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sec_name,
size_t length);
@@ -100,13 +102,13 @@ int rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sec_name,
* @param cfg
* Config file
* @param sections
-* Array containing section names after successful invocation. Each elemen
+* Array containing section names after successful invocation. Each element
* of this array should be preallocated by the user with at least
* CFG_NAME_LEN characters.
* @param max_sections
* Maximum number of section names to be stored in sections array
* @return
-* 0 on success, error code otherwise
+* Number of populated sections names
*/
int rte_cfgfile_sections(struct rte_cfgfile *cfg, char *sections[],
int max_sections);
@@ -134,12 +136,13 @@ int rte_cfgfile_has_section(struct rte_cfgfile *cfg, const char *sectionname);
* @param sectionname
* Section name
* @return
-* Number of entries in section
+* Number of entries in section on success, -1 otherwise
*/
int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg,
const char *sectionname);
-/** Get section entries as key-value pairs
+/**
+* Get section entries as key-value pairs
*
* If multiple sections have the given name this function operates on the
* first one.
@@ -154,14 +157,15 @@ int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg,
* @param max_entries
* Maximum number of section entries to be stored in entries array
* @return
-* 0 on success, error code otherwise
+* Number of entries populated on success, -1 otherwise
*/
int rte_cfgfile_section_entries(struct rte_cfgfile *cfg,
const char *sectionname,
struct rte_cfgfile_entry *entries,
int max_entries);
-/** Get section entries as key-value pairs
+/**
+* Get section entries as key-value pairs
*
* The index of a section is the same as the index of its name in the
* result of rte_cfgfile_sections. This API can be used when there are
@@ -180,7 +184,7 @@ int rte_cfgfile_section_entries(struct rte_cfgfile *cfg,
* @param max_entries
* Maximum number of section entries to be stored in entries array
* @return
-* Number of entries populated on success, negative error code otherwise
+* Number of entries populated on success, -1 otherwise
*/
int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg,
int index,
@@ -188,7 +192,8 @@ int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg,
struct rte_cfgfile_entry *entries,
int max_entries);
-/** Get value of the named entry in named config file section
+/**
+* Get value of the named entry in named config file section
*
* If multiple sections have the given name this function operates on the
* first one.
@@ -200,13 +205,14 @@ int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg,
* @param entryname
* Entry name
* @return
-* Entry value
+* Entry value on success, NULL otherwise
*/
const char *rte_cfgfile_get_entry(struct rte_cfgfile *cfg,
const char *sectionname,
const char *entryname);
-/** Check if given entry exists in named config file section
+/**
+* Check if given entry exists in named config file section
*
* If multiple sections have the given name this function operates on the
* first one.
@@ -223,12 +229,13 @@ const char *rte_cfgfile_get_entry(struct rte_cfgfile *cfg,
int rte_cfgfile_has_entry(struct rte_cfgfile *cfg, const char *sectionname,
const char *entryname);
-/** Close config file
+/**
+* Close config file
*
* @param cfg
* Config file
* @return
-* 0 on success, error code otherwise
+* 0 on success, -1 otherwise
*/
int rte_cfgfile_close(struct rte_cfgfile *cfg);
diff --git a/lib/librte_cmdline/cmdline.h b/lib/librte_cmdline/cmdline.h
index 2578ca81..65d73b01 100644
--- a/lib/librte_cmdline/cmdline.h
+++ b/lib/librte_cmdline/cmdline.h
@@ -63,6 +63,7 @@
#include <termios.h>
#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
/**
* @file
diff --git a/lib/librte_cmdline/cmdline_parse_portlist.h b/lib/librte_cmdline/cmdline_parse_portlist.h
index 73d70e05..058df3ee 100644
--- a/lib/librte_cmdline/cmdline_parse_portlist.h
+++ b/lib/librte_cmdline/cmdline_parse_portlist.h
@@ -61,6 +61,7 @@
#ifndef _PARSE_PORTLIST_H_
#define _PARSE_PORTLIST_H_
+#include <stdint.h>
#include <cmdline_parse.h>
#ifdef __cplusplus
diff --git a/lib/librte_cmdline/cmdline_socket.h b/lib/librte_cmdline/cmdline_socket.h
index 8cc2dfbc..aa6068e7 100644
--- a/lib/librte_cmdline/cmdline_socket.h
+++ b/lib/librte_cmdline/cmdline_socket.h
@@ -61,6 +61,9 @@
#ifndef _CMDLINE_SOCKET_H_
#define _CMDLINE_SOCKET_H_
+#include <cmdline_parse.h>
+#include <cmdline.h>
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/lib/librte_cryptodev/Makefile b/lib/librte_cryptodev/Makefile
index 314a0466..aebf5d9f 100644
--- a/lib/librte_cryptodev/Makefile
+++ b/lib/librte_cryptodev/Makefile
@@ -34,7 +34,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_cryptodev.a
# library version
-LIBABIVER := 1
+LIBABIVER := 2
# build flags
CFLAGS += -O3
diff --git a/lib/librte_cryptodev/rte_crypto.h b/lib/librte_cryptodev/rte_crypto.h
index 5bc3eaa7..90195188 100644
--- a/lib/librte_cryptodev/rte_crypto.h
+++ b/lib/librte_cryptodev/rte_crypto.h
@@ -48,6 +48,7 @@ extern "C" {
#include <rte_mbuf.h>
#include <rte_memory.h>
#include <rte_mempool.h>
+#include <rte_common.h>
#include "rte_crypto_sym.h"
@@ -111,6 +112,7 @@ struct rte_crypto_op {
void *opaque_data;
/**< Opaque pointer for user data */
+ RTE_STD_C11
union {
struct rte_crypto_sym_op *sym;
/**< Symmetric operation parameters */
diff --git a/lib/librte_cryptodev/rte_crypto_sym.h b/lib/librte_cryptodev/rte_crypto_sym.h
index d9bd8210..d3d38e4f 100644
--- a/lib/librte_cryptodev/rte_crypto_sym.h
+++ b/lib/librte_cryptodev/rte_crypto_sym.h
@@ -51,6 +51,7 @@ extern "C" {
#include <rte_mbuf.h>
#include <rte_memory.h>
#include <rte_mempool.h>
+#include <rte_common.h>
/** Symmetric Cipher Algorithms */
@@ -83,11 +84,11 @@ enum rte_crypto_cipher_algorithm {
/**< AES algorithm in F8 mode */
RTE_CRYPTO_CIPHER_AES_GCM,
/**< AES algorithm in GCM mode. When this cipher algorithm is used the
- * *RTE_CRYPTO_AUTH_AES_GCM* element of the
- * *rte_crypto_auth_algorithm* enum MUST be used to set up the related
- * *rte_crypto_auth_setup_data* structure in the session context or in
- * the op_params of the crypto operation structure in the case of a
- * session-less crypto operation.
+ * *RTE_CRYPTO_AUTH_AES_GCM* or *RTE_CRYPTO_AUTH_AES_GMAC* element
+ * of the *rte_crypto_auth_algorithm* enum MUST be used to set up
+ * the related *rte_crypto_auth_setup_data* structure in the session
+ * context or in the op_params of the crypto operation structure
+ * in the case of a session-less crypto operation.
*/
RTE_CRYPTO_CIPHER_AES_XTS,
/**< AES algorithm in XTS mode */
@@ -96,10 +97,10 @@ enum rte_crypto_cipher_algorithm {
/**< (A)RC4 cipher algorithm */
RTE_CRYPTO_CIPHER_KASUMI_F8,
- /**< Kasumi algorithm in F8 mode */
+ /**< KASUMI algorithm in F8 mode */
RTE_CRYPTO_CIPHER_SNOW3G_UEA2,
- /**< SNOW3G algorithm in UEA2 mode */
+ /**< SNOW 3G algorithm in UEA2 mode */
RTE_CRYPTO_CIPHER_ZUC_EEA3,
/**< ZUC algorithm in EEA3 mode */
@@ -203,7 +204,7 @@ enum rte_crypto_auth_algorithm {
/**< AES XCBC algorithm. */
RTE_CRYPTO_AUTH_KASUMI_F9,
- /**< Kasumi algorithm in F9 mode. */
+ /**< KASUMI algorithm in F9 mode. */
RTE_CRYPTO_AUTH_MD5,
/**< MD5 algorithm */
@@ -232,7 +233,7 @@ enum rte_crypto_auth_algorithm {
/**< HMAC using 512 bit SHA algorithm. */
RTE_CRYPTO_AUTH_SNOW3G_UIA2,
- /**< SNOW3G algorithm in UIA2 mode. */
+ /**< SNOW 3G algorithm in UIA2 mode. */
RTE_CRYPTO_AUTH_ZUC_EIA3,
/**< ZUC algorithm in EIA3 mode */
@@ -290,7 +291,7 @@ struct rte_crypto_auth_xform {
* This field must be specified when the hash algorithm is one of the
* following:
*
- * - For SNOW3G (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2), this is the
+ * - For SNOW 3G (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2), this is the
* length of the IV (which should be 16).
*
* - For GCM (@ref RTE_CRYPTO_AUTH_AES_GCM). In this case, this is
@@ -307,8 +308,8 @@ struct rte_crypto_auth_xform {
* @note
* For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) mode of operation
* this field is not used and should be set to 0. Instead the length
- * of the AAD data is specified in the message length to hash field of
- * the rte_crypto_sym_op_data structure.
+ * of the AAD data is specified in additional authentication data
+ * length field of the rte_crypto_sym_op_data structure
*/
};
@@ -333,6 +334,7 @@ struct rte_crypto_sym_xform {
/**< next xform in chain */
enum rte_crypto_sym_xform_type type
; /**< xform type */
+ RTE_STD_C11
union {
struct rte_crypto_auth_xform auth;
/**< Authentication / hash xform */
@@ -364,6 +366,25 @@ struct rte_cryptodev_sym_session;
* it must have a valid *rte_mbuf* structure attached, via m_src parameter,
* which contains the source data which the crypto operation is to be performed
* on.
+ * While the mbuf is in use by a crypto operation no part of the mbuf should be
+ * changed by the application as the device may read or write to any part of the
+ * mbuf. In the case of hardware crypto devices some or all of the mbuf
+ * may be DMAed in and out of the device, so writing over the original data,
+ * though only the part specified by the rte_crypto_sym_op for transformation
+ * will be changed.
+ * Out-of-place (OOP) operation, where the source mbuf is different to the
+ * destination mbuf, is a special case. Data will be copied from m_src to m_dst.
+ * The part copied includes all the parts of the source mbuf that will be
+ * operated on, based on the cipher.data.offset+cipher.data.length and
+ * auth.data.offset+auth.data.length values in the rte_crypto_sym_op. The part
+ * indicated by the cipher parameters will be transformed, any extra data around
+ * this indicated by the auth parameters will be copied unchanged from source to
+ * destination mbuf.
+ * Also in OOP operation the cipher.data.offset and auth.data.offset apply to
+ * both source and destination mbufs. As these offsets are relative to the
+ * data_off parameter in each mbuf this can result in the data written to the
+ * destination buffer being at a different alignment, relative to buffer start,
+ * to the data in the source buffer.
*/
struct rte_crypto_sym_op {
struct rte_mbuf *m_src; /**< source mbuf */
@@ -371,6 +392,7 @@ struct rte_crypto_sym_op {
enum rte_crypto_sym_op_sess_type sess_type;
+ RTE_STD_C11
union {
struct rte_cryptodev_sym_session *session;
/**< Handle for the initialised session context */
@@ -388,8 +410,9 @@ struct rte_crypto_sym_op {
* this location.
*
* @note
- * For Snow3G @ RTE_CRYPTO_CIPHER_SNOW3G_UEA2
- * and KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8,
+ * For SNOW 3G @ RTE_CRYPTO_CIPHER_SNOW3G_UEA2,
+ * KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8
+ * and ZUC @ RTE_CRYPTO_CIPHER_ZUC_EEA3,
* this field should be in bits.
*/
@@ -413,8 +436,9 @@ struct rte_crypto_sym_op {
* field should be set to 0.
*
* @note
- * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UEA2
- * and KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8,
+ * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UEA2,
+ * KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8
+ * and ZUC @ RTE_CRYPTO_CIPHER_ZUC_EEA3,
* this field should be in bits.
*/
} data; /**< Data offsets and length for ciphering */
@@ -423,8 +447,8 @@ struct rte_crypto_sym_op {
uint8_t *data;
/**< Initialisation Vector or Counter.
*
- * - For block ciphers in CBC or F8 mode, or for Kasumi
- * in F8 mode, or for SNOW3G in UEA2 mode, this is the
+ * - For block ciphers in CBC or F8 mode, or for KASUMI
+ * in F8 mode, or for SNOW 3G in UEA2 mode, this is the
* Initialisation Vector (IV) value.
*
* - For block ciphers in CTR mode, this is the counter.
@@ -451,8 +475,8 @@ struct rte_crypto_sym_op {
uint16_t length;
/**< Length of valid IV data.
*
- * - For block ciphers in CBC or F8 mode, or for Kasumi
- * in F8 mode, or for SNOW3G in UEA2 mode, this is the
+ * - For block ciphers in CBC or F8 mode, or for KASUMI
+ * in F8 mode, or for SNOW 3G in UEA2 mode, this is the
* length of the IV (which must be the same as the
* block length of the cipher).
*
@@ -482,12 +506,14 @@ struct rte_crypto_sym_op {
* should be set instead.
*
* @note For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC)
- * mode of operation, this field specifies the start
- * of the AAD data in the source buffer.
+ * mode of operation, this field is set to 0. aad data
+ * pointer of rte_crypto_sym_op_data structure is
+ * used instead
*
* @note
- * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2
- * and KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9,
+ * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2,
+ * KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9
+ * and ZUC @ RTE_CRYPTO_AUTH_ZUC_EIA3,
* this field should be in bits.
*/
@@ -502,12 +528,13 @@ struct rte_crypto_sym_op {
*
* @note
* For AES-GMAC @ref RTE_CRYPTO_AUTH_AES_GMAC mode
- * of operation, this field specifies the length of
- * the AAD data in the source buffer.
+ * of operation, this field is set to 0.
+ * Auth.aad.length is used instead.
*
* @note
- * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2
- * and KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9,
+ * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2,
+ * KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9
+ * and ZUC @ RTE_CRYPTO_AUTH_ZUC_EIA3,
* this field should be in bits.
*/
} data; /**< Data offsets and length for authentication */
@@ -551,7 +578,7 @@ struct rte_crypto_sym_op {
uint8_t *data;
/**< Pointer to Additional Authenticated Data (AAD)
* needed for authenticated cipher mechanisms (CCM and
- * GCM), and to the IV for SNOW3G authentication
+ * GCM), and to the IV for SNOW 3G authentication
* (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2). For other
* authentication mechanisms this pointer is ignored.
*
@@ -589,9 +616,7 @@ struct rte_crypto_sym_op {
*
* @note
* For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) mode of
- * operation, this field is not used and should be set
- * to 0. Instead the AAD data should be placed in the
- * source buffer.
+ * operation, this field is used to pass plaintext.
*/
phys_addr_t phys_addr; /**< physical address */
uint16_t length; /**< Length of digest */
diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c
index fc4123b6..127e8d0d 100644
--- a/lib/librte_cryptodev/rte_cryptodev.c
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -59,7 +59,6 @@
#include <rte_atomic.h>
#include <rte_branch_prediction.h>
#include <rte_common.h>
-#include <rte_ring.h>
#include <rte_mempool.h>
#include <rte_malloc.h>
#include <rte_mbuf.h>
@@ -319,7 +318,7 @@ rte_cryptodev_find_free_device_index(void)
}
struct rte_cryptodev *
-rte_cryptodev_pmd_allocate(const char *name, enum pmd_type type, int socket_id)
+rte_cryptodev_pmd_allocate(const char *name, int socket_id)
{
struct rte_cryptodev *cryptodev;
uint8_t dev_id;
@@ -358,7 +357,6 @@ rte_cryptodev_pmd_allocate(const char *name, enum pmd_type type, int socket_id)
cryptodev->data->dev_started = 0;
cryptodev->attached = RTE_CRYPTODEV_ATTACHED;
- cryptodev->pmd_type = type;
cryptodev_globals.nb_devs++;
}
@@ -366,23 +364,6 @@ rte_cryptodev_pmd_allocate(const char *name, enum pmd_type type, int socket_id)
return cryptodev;
}
-static inline int
-rte_cryptodev_create_unique_device_name(char *name, size_t size,
- struct rte_pci_device *pci_dev)
-{
- int ret;
-
- if ((name == NULL) || (pci_dev == NULL))
- return -EINVAL;
-
- ret = snprintf(name, size, "%d:%d.%d",
- pci_dev->addr.bus, pci_dev->addr.devid,
- pci_dev->addr.function);
- if (ret < 0)
- return ret;
- return 0;
-}
-
int
rte_cryptodev_pmd_release_device(struct rte_cryptodev *cryptodev)
{
@@ -407,7 +388,7 @@ rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size,
struct rte_cryptodev *cryptodev;
/* allocate device structure */
- cryptodev = rte_cryptodev_pmd_allocate(name, PMD_VDEV, socket_id);
+ cryptodev = rte_cryptodev_pmd_allocate(name, socket_id);
if (cryptodev == NULL)
return NULL;
@@ -430,9 +411,9 @@ rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size,
return cryptodev;
}
-static int
-rte_cryptodev_init(struct rte_pci_driver *pci_drv,
- struct rte_pci_device *pci_dev)
+int
+rte_cryptodev_pci_probe(struct rte_pci_driver *pci_drv,
+ struct rte_pci_device *pci_dev)
{
struct rte_cryptodev_driver *cryptodrv;
struct rte_cryptodev *cryptodev;
@@ -445,12 +426,10 @@ rte_cryptodev_init(struct rte_pci_driver *pci_drv,
if (cryptodrv == NULL)
return -ENODEV;
- /* Create unique Crypto device name using PCI address */
- rte_cryptodev_create_unique_device_name(cryptodev_name,
- sizeof(cryptodev_name), pci_dev);
+ rte_eal_pci_device_name(&pci_dev->addr, cryptodev_name,
+ sizeof(cryptodev_name));
- cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, PMD_PDEV,
- rte_socket_id());
+ cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, rte_socket_id());
if (cryptodev == NULL)
return -ENOMEM;
@@ -479,7 +458,7 @@ rte_cryptodev_init(struct rte_pci_driver *pci_drv,
return 0;
CDEV_LOG_ERR("driver %s: crypto_dev_init(vendor_id=0x%x device_id=0x%x)"
- " failed", pci_drv->name,
+ " failed", pci_drv->driver.name,
(unsigned) pci_dev->id.vendor_id,
(unsigned) pci_dev->id.device_id);
@@ -492,8 +471,8 @@ rte_cryptodev_init(struct rte_pci_driver *pci_drv,
return -ENXIO;
}
-static int
-rte_cryptodev_uninit(struct rte_pci_device *pci_dev)
+int
+rte_cryptodev_pci_remove(struct rte_pci_device *pci_dev)
{
const struct rte_cryptodev_driver *cryptodrv;
struct rte_cryptodev *cryptodev;
@@ -503,9 +482,8 @@ rte_cryptodev_uninit(struct rte_pci_device *pci_dev)
if (pci_dev == NULL)
return -EINVAL;
- /* Create unique device name using PCI address */
- rte_cryptodev_create_unique_device_name(cryptodev_name,
- sizeof(cryptodev_name), pci_dev);
+ rte_eal_pci_device_name(&pci_dev->addr, cryptodev_name,
+ sizeof(cryptodev_name));
cryptodev = rte_cryptodev_pmd_get_named_dev(cryptodev_name);
if (cryptodev == NULL)
@@ -535,28 +513,6 @@ rte_cryptodev_uninit(struct rte_pci_device *pci_dev)
return 0;
}
-int
-rte_cryptodev_pmd_driver_register(struct rte_cryptodev_driver *cryptodrv,
- enum pmd_type type)
-{
- /* Call crypto device initialization directly if device is virtual */
- if (type == PMD_VDEV)
- return rte_cryptodev_init((struct rte_pci_driver *)cryptodrv,
- NULL);
-
- /*
- * Register PCI driver for physical device intialisation during
- * PCI probing
- */
- cryptodrv->pci_drv.devinit = rte_cryptodev_init;
- cryptodrv->pci_drv.devuninit = rte_cryptodev_uninit;
-
- rte_eal_pci_register(&cryptodrv->pci_drv);
-
- return 0;
-}
-
-
uint16_t
rte_cryptodev_queue_pair_count(uint8_t dev_id)
{
@@ -913,7 +869,7 @@ rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info)
dev_info->pci_dev = dev->pci_dev;
if (dev->driver)
- dev_info->driver_name = dev->driver->pci_drv.name;
+ dev_info->driver_name = dev->driver->pci_drv.driver.name;
}
diff --git a/lib/librte_cryptodev/rte_cryptodev.h b/lib/librte_cryptodev/rte_cryptodev.h
index affbdecc..8f63e8f6 100644
--- a/lib/librte_cryptodev/rte_cryptodev.h
+++ b/lib/librte_cryptodev/rte_cryptodev.h
@@ -48,18 +48,23 @@ extern "C" {
#include "rte_kvargs.h"
#include "rte_crypto.h"
#include "rte_dev.h"
+#include <rte_common.h>
-#define CRYPTODEV_NAME_NULL_PMD cryptodev_null_pmd
+#define CRYPTODEV_NAME_NULL_PMD crypto_null
/**< Null crypto PMD device name */
-#define CRYPTODEV_NAME_AESNI_MB_PMD cryptodev_aesni_mb_pmd
+#define CRYPTODEV_NAME_AESNI_MB_PMD crypto_aesni_mb
/**< AES-NI Multi buffer PMD device name */
-#define CRYPTODEV_NAME_AESNI_GCM_PMD cryptodev_aesni_gcm_pmd
+#define CRYPTODEV_NAME_AESNI_GCM_PMD crypto_aesni_gcm
/**< AES-NI GCM PMD device name */
-#define CRYPTODEV_NAME_QAT_SYM_PMD cryptodev_qat_sym_pmd
+#define CRYPTODEV_NAME_OPENSSL_PMD crypto_openssl
+/**< Open SSL Crypto PMD device name */
+#define CRYPTODEV_NAME_QAT_SYM_PMD crypto_qat
/**< Intel QAT Symmetric Crypto PMD device name */
-#define CRYPTODEV_NAME_SNOW3G_PMD cryptodev_snow3g_pmd
+#define CRYPTODEV_NAME_SNOW3G_PMD crypto_snow3g
/**< SNOW 3G PMD device name */
-#define CRYPTODEV_NAME_KASUMI_PMD cryptodev_kasumi_pmd
+#define CRYPTODEV_NAME_KASUMI_PMD crypto_kasumi
+/**< KASUMI PMD device name */
+#define CRYPTODEV_NAME_ZUC_PMD crypto_zuc
/**< KASUMI PMD device name */
/** Crypto device type */
@@ -70,32 +75,38 @@ enum rte_cryptodev_type {
RTE_CRYPTODEV_QAT_SYM_PMD, /**< QAT PMD Symmetric Crypto */
RTE_CRYPTODEV_SNOW3G_PMD, /**< SNOW 3G PMD */
RTE_CRYPTODEV_KASUMI_PMD, /**< KASUMI PMD */
+ RTE_CRYPTODEV_ZUC_PMD, /**< ZUC PMD */
+ RTE_CRYPTODEV_OPENSSL_PMD, /**< OpenSSL PMD */
};
extern const char **rte_cyptodev_names;
/* Logging Macros */
-#define CDEV_LOG_ERR(fmt, args...) \
- RTE_LOG(ERR, CRYPTODEV, "%s() line %u: " fmt "\n", \
- __func__, __LINE__, ## args)
+#define CDEV_LOG_ERR(...) \
+ RTE_LOG(ERR, CRYPTODEV, \
+ RTE_FMT("%s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
+ __func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,)))
-#define CDEV_PMD_LOG_ERR(dev, fmt, args...) \
- RTE_LOG(ERR, CRYPTODEV, "[%s] %s() line %u: " fmt "\n", \
- dev, __func__, __LINE__, ## args)
+#define CDEV_PMD_LOG_ERR(dev, ...) \
+ RTE_LOG(ERR, CRYPTODEV, \
+ RTE_FMT("[%s] %s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
+ dev, __func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,)))
#ifdef RTE_LIBRTE_CRYPTODEV_DEBUG
-#define CDEV_LOG_DEBUG(fmt, args...) \
- RTE_LOG(DEBUG, CRYPTODEV, "%s() line %u: " fmt "\n", \
- __func__, __LINE__, ## args) \
+#define CDEV_LOG_DEBUG(...) \
+ RTE_LOG(DEBUG, CRYPTODEV, \
+ RTE_FMT("%s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
+ __func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,)))
-#define CDEV_PMD_TRACE(fmt, args...) \
- RTE_LOG(DEBUG, CRYPTODEV, "[%s] %s: " fmt "\n", \
- dev, __func__, ## args)
+#define CDEV_PMD_TRACE(...) \
+ RTE_LOG(DEBUG, CRYPTODEV, \
+ RTE_FMT("[%s] %s: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
+ dev, __func__, RTE_FMT_TAIL(__VA_ARGS__,)))
#else
-#define CDEV_LOG_DEBUG(fmt, args...)
-#define CDEV_PMD_TRACE(fmt, args...)
+#define CDEV_LOG_DEBUG(...) (void)0
+#define CDEV_PMD_TRACE(...) (void)0
#endif
/**
@@ -104,6 +115,7 @@ extern const char **rte_cyptodev_names;
struct rte_cryptodev_symmetric_capability {
enum rte_crypto_sym_xform_type xform_type;
/**< Transform type : Authentication / Cipher */
+ RTE_STD_C11
union {
struct {
enum rte_crypto_auth_algorithm algo;
@@ -177,6 +189,7 @@ struct rte_cryptodev_capabilities {
enum rte_crypto_op_type op;
/**< Operation type */
+ RTE_STD_C11
union {
struct rte_cryptodev_symmetric_capability sym;
/**< Symmetric operation capability parameters */
@@ -613,12 +626,11 @@ struct rte_cryptodev {
enum rte_cryptodev_type dev_type;
/**< Crypto device type */
- enum pmd_type pmd_type;
- /**< PMD type - PDEV / VDEV */
struct rte_cryptodev_cb_list link_intr_cbs;
/**< User application callback for interrupts if present */
+ __extension__
uint8_t attached : 1;
/**< Flag indicating the device is attached */
} __rte_cache_aligned;
@@ -642,6 +654,7 @@ struct rte_cryptodev_data {
char name[RTE_CRYPTODEV_NAME_MAX_LEN];
/**< Unique identifier name */
+ __extension__
uint8_t dev_started : 1;
/**< Device state: STARTED(1)/STOPPED(0) */
@@ -749,6 +762,7 @@ rte_cryptodev_enqueue_burst(uint8_t dev_id, uint16_t qp_id,
/** Cryptodev symmetric crypto session */
struct rte_cryptodev_sym_session {
+ RTE_STD_C11
struct {
uint8_t dev_id;
/**< Device Id */
@@ -759,7 +773,7 @@ struct rte_cryptodev_sym_session {
} __rte_aligned(8);
/**< Public symmetric session details */
- char _private[0];
+ __extension__ char _private[0];
/**< Private session material */
};
diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h
index 7d049ea3..abfe2dc1 100644
--- a/lib/librte_cryptodev/rte_cryptodev_pmd.h
+++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h
@@ -52,6 +52,7 @@ extern "C" {
#include <rte_mbuf.h>
#include <rte_mempool.h>
#include <rte_log.h>
+#include <rte_common.h>
#include "rte_crypto.h"
#include "rte_cryptodev.h"
@@ -61,17 +62,18 @@ extern "C" {
#define RTE_PMD_DEBUG_TRACE(...) \
rte_pmd_debug_trace(__func__, __VA_ARGS__)
#else
-#define RTE_PMD_DEBUG_TRACE(fmt, args...)
+#define RTE_PMD_DEBUG_TRACE(...)
#endif
struct rte_cryptodev_session {
+ RTE_STD_C11
struct {
uint8_t dev_id;
enum rte_cryptodev_type type;
struct rte_mempool *mp;
} __rte_aligned(8);
- char _private[0];
+ __extension__ char _private[0];
};
struct rte_cryptodev_driver;
@@ -454,13 +456,12 @@ struct rte_cryptodev_ops {
* to that slot for the driver to use.
*
* @param name Unique identifier name for each device
- * @param type Device type of this Crypto device
* @param socket_id Socket to allocate resources on.
* @return
* - Slot in the rte_dev_devices array for a new device;
*/
struct rte_cryptodev *
-rte_cryptodev_pmd_allocate(const char *name, enum pmd_type type, int socket_id);
+rte_cryptodev_pmd_allocate(const char *name, int socket_id);
/**
* Creates a new virtual crypto device and returns the pointer
@@ -492,36 +493,6 @@ rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size,
extern int
rte_cryptodev_pmd_release_device(struct rte_cryptodev *cryptodev);
-
-/**
- * Register a Crypto [Poll Mode] driver.
- *
- * Function invoked by the initialization function of a Crypto driver
- * to simultaneously register itself as Crypto Poll Mode Driver and to either:
- *
- * a - register itself as PCI driver if the crypto device is a physical
- * device, by invoking the rte_eal_pci_register() function to
- * register the *pci_drv* structure embedded in the *crypto_drv*
- * structure, after having stored the address of the
- * rte_cryptodev_init() function in the *devinit* field of the
- * *pci_drv* structure.
- *
- * During the PCI probing phase, the rte_cryptodev_init()
- * function is invoked for each PCI [device] matching the
- * embedded PCI identifiers provided by the driver.
- *
- * b, complete the initialization sequence if the device is a virtual
- * device by calling the rte_cryptodev_init() directly passing a
- * NULL parameter for the rte_pci_device structure.
- *
- * @param crypto_drv crypto_driver structure associated with the crypto
- * driver.
- * @param type pmd type
- */
-extern int
-rte_cryptodev_pmd_driver_register(struct rte_cryptodev_driver *crypto_drv,
- enum pmd_type type);
-
/**
* Executes all the user application registered callbacks for the specific
* device.
@@ -535,6 +506,18 @@ rte_cryptodev_pmd_driver_register(struct rte_cryptodev_driver *crypto_drv,
void rte_cryptodev_pmd_callback_process(struct rte_cryptodev *dev,
enum rte_cryptodev_event_type event);
+/**
+ * Wrapper for use by pci drivers as a .probe function to attach to a crypto
+ * interface.
+ */
+int rte_cryptodev_pci_probe(struct rte_pci_driver *pci_drv,
+ struct rte_pci_device *pci_dev);
+
+/**
+ * Wrapper for use by pci drivers as a .remove function to detach a crypto
+ * interface.
+ */
+int rte_cryptodev_pci_remove(struct rte_pci_device *pci_dev);
#ifdef __cplusplus
}
diff --git a/lib/librte_cryptodev/rte_cryptodev_version.map b/lib/librte_cryptodev/rte_cryptodev_version.map
index a08fd202..9dde0e72 100644
--- a/lib/librte_cryptodev/rte_cryptodev_version.map
+++ b/lib/librte_cryptodev/rte_cryptodev_version.map
@@ -14,7 +14,6 @@ DPDK_16.04 {
rte_cryptodev_info_get;
rte_cryptodev_pmd_allocate;
rte_cryptodev_pmd_callback_process;
- rte_cryptodev_pmd_driver_register;
rte_cryptodev_pmd_release_device;
rte_cryptodev_pmd_virtual_dev_init;
rte_cryptodev_sym_session_create;
@@ -39,3 +38,11 @@ DPDK_16.07 {
rte_cryptodev_parse_vdev_init_params;
} DPDK_16.04;
+
+DPDK_16.11 {
+ global:
+
+ rte_cryptodev_pci_probe;
+ rte_cryptodev_pci_remove;
+
+} DPDK_16.07;
diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index 988cbbce..a15b762b 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -48,14 +48,13 @@ LDLIBS += -lgcc_s
EXPORT_MAP := rte_eal_version.map
-LIBABIVER := 2
+LIBABIVER := 3
-# specific to linuxapp exec-env
+# specific to bsdapp exec-env
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_log.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_pci.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c
@@ -69,6 +68,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_timer.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memzone.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_log.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_launch.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_vdev.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci_uio.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memory.c
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index a0c8f8c8..35e3117a 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -496,14 +496,14 @@ rte_eal_init(int argc, char **argv)
char cpuset[RTE_CPU_AFFINITY_STR_LEN];
char thread_name[RTE_MAX_THREAD_NAME_LEN];
+ /* checks if the machine is adequate */
+ rte_cpu_check_supported();
+
if (!rte_atomic32_test_and_set(&run_once))
return -1;
thread_id = pthread_self();
- if (rte_eal_log_early_init() < 0)
- rte_panic("Cannot init early logs\n");
-
eal_log_level_parse(argc, argv);
/* set log level as early as possible */
@@ -552,9 +552,6 @@ rte_eal_init(int argc, char **argv)
if (rte_eal_tailqs_init() < 0)
rte_panic("Cannot init tail queues for objects\n");
-/* if (rte_eal_log_init(argv[0], internal_config.syslog_facility) < 0)
- rte_panic("Cannot init logs\n");*/
-
if (rte_eal_alarm_init() < 0)
rte_panic("Cannot init interrupt-handling thread\n");
diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c
index 374b68f2..8b3ed881 100644
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -287,7 +287,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
dev->max_vfs = 0;
/* FreeBSD has no NUMA support (yet) */
- dev->numa_node = 0;
+ dev->device.numa_node = 0;
/* FreeBSD has only one pass through driver */
dev->kdrv = RTE_KDRV_NIC_UIO;
@@ -406,6 +406,55 @@ error:
return -1;
}
+int
+pci_update_device(const struct rte_pci_addr *addr)
+{
+ int fd;
+ struct pci_conf matches[2];
+ struct pci_match_conf match = {
+ .pc_sel = {
+ .pc_domain = addr->domain,
+ .pc_bus = addr->bus,
+ .pc_dev = addr->devid,
+ .pc_func = addr->function,
+ },
+ };
+ struct pci_conf_io conf_io = {
+ .pat_buf_len = 0,
+ .num_patterns = 1,
+ .patterns = &match,
+ .match_buf_len = sizeof(matches),
+ .matches = &matches[0],
+ };
+
+ fd = open("/dev/pci", O_RDONLY);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
+ goto error;
+ }
+
+ if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) {
+ RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n",
+ __func__, strerror(errno));
+ goto error;
+ }
+
+ if (conf_io.num_matches != 1)
+ goto error;
+
+ if (pci_scan_one(fd, &matches[0]) < 0)
+ goto error;
+
+ close(fd);
+
+ return 0;
+
+error:
+ if (fd >= 0)
+ close(fd);
+ return -1;
+}
+
/* Read PCI config space. */
int rte_eal_pci_read_config(const struct rte_pci_device *dev,
void *buf, size_t len, off_t offset)
@@ -623,9 +672,6 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p)
int
rte_eal_pci_init(void)
{
- TAILQ_INIT(&pci_driver_list);
- TAILQ_INIT(&pci_device_list);
-
/* for debug purposes, PCI can be disabled */
if (internal_config.no_pci)
return 0;
diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index a335e04b..2f81f7c0 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -162,3 +162,15 @@ DPDK_16.07 {
rte_thread_setname;
} DPDK_16.04;
+
+DPDK_16.11 {
+ global:
+
+ rte_delay_us_block;
+ rte_delay_us_callback_register;
+ rte_eal_dev_attach;
+ rte_eal_dev_detach;
+ rte_eal_vdrv_register;
+ rte_eal_vdrv_unregister;
+
+} DPDK_16.07;
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index f5ea0ee8..dfd64aa5 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -34,11 +34,11 @@ include $(RTE_SDK)/mk/rte.vars.mk
INC := rte_branch_prediction.h rte_common.h
INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
-INC += rte_pci_dev_ids.h rte_per_lcore.h rte_random.h
+INC += rte_per_lcore.h rte_random.h
INC += rte_tailq.h rte_interrupts.h rte_alarm.h
INC += rte_string_fns.h rte_version.h
INC += rte_eal_memconfig.h rte_malloc_heap.h
-INC += rte_hexdump.h rte_devargs.h rte_dev.h
+INC += rte_hexdump.h rte_devargs.h rte_dev.h rte_vdev.h
INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
INC += rte_malloc.h rte_keepalive.h rte_time.h
diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
index ecb12409..b5f76f7f 100644
--- a/lib/librte_eal/common/eal_common_cpuflags.c
+++ b/lib/librte_eal/common/eal_common_cpuflags.c
@@ -39,14 +39,8 @@
/**
* Checks if the machine is adequate for running the binary. If it is not, the
* program exits with status 1.
- * The function attribute forces this function to be called before main(). But
- * with ICC, the check is generated by the compiler.
*/
-#ifndef __INTEL_COMPILER
-void __attribute__ ((__constructor__))
-#else
void
-#endif
rte_cpu_check_supported(void)
{
/* This is generated at compile-time by the build system */
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index a8a4146c..4f3b4934 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -48,6 +48,9 @@
/** Global list of device drivers. */
static struct rte_driver_list dev_driver_list =
TAILQ_HEAD_INITIALIZER(dev_driver_list);
+/** Global list of device drivers. */
+static struct rte_device_list dev_device_list =
+ TAILQ_HEAD_INITIALIZER(dev_device_list);
/* register a driver */
void
@@ -63,42 +66,25 @@ rte_eal_driver_unregister(struct rte_driver *driver)
TAILQ_REMOVE(&dev_driver_list, driver, next);
}
-int
-rte_eal_vdev_init(const char *name, const char *args)
+void rte_eal_device_insert(struct rte_device *dev)
{
- struct rte_driver *driver;
-
- if (name == NULL)
- return -EINVAL;
-
- TAILQ_FOREACH(driver, &dev_driver_list, next) {
- if (driver->type != PMD_VDEV)
- continue;
-
- /*
- * search a driver prefix in virtual device name.
- * For example, if the driver is pcap PMD, driver->name
- * will be "eth_pcap", but "name" will be "eth_pcapN".
- * So use strncmp to compare.
- */
- if (!strncmp(driver->name, name, strlen(driver->name)))
- return driver->init(name, args);
- }
+ TAILQ_INSERT_TAIL(&dev_device_list, dev, next);
+}
- RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
- return -EINVAL;
+void rte_eal_device_remove(struct rte_device *dev)
+{
+ TAILQ_REMOVE(&dev_device_list, dev, next);
}
int
rte_eal_dev_init(void)
{
struct rte_devargs *devargs;
- struct rte_driver *driver;
/*
* Note that the dev_driver_list is populated here
* from calls made to rte_eal_driver_register from constructor functions
- * embedded into PMD modules via the PMD_REGISTER_DRIVER macro
+ * embedded into PMD modules via the RTE_PMD_REGISTER_VDEV macro
*/
/* call the init function for each virtual device */
@@ -115,38 +101,53 @@ rte_eal_dev_init(void)
}
}
- /* Once the vdevs are initalized, start calling all the pdev drivers */
- TAILQ_FOREACH(driver, &dev_driver_list, next) {
- if (driver->type != PMD_PDEV)
- continue;
- /* PDEV drivers don't get passed any parameters */
- driver->init(NULL, NULL);
- }
return 0;
}
-int
-rte_eal_vdev_uninit(const char *name)
+int rte_eal_dev_attach(const char *name, const char *devargs)
{
- struct rte_driver *driver;
+ struct rte_pci_addr addr;
- if (name == NULL)
+ if (name == NULL || devargs == NULL) {
+ RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n");
return -EINVAL;
+ }
- TAILQ_FOREACH(driver, &dev_driver_list, next) {
- if (driver->type != PMD_VDEV)
- continue;
+ if (eal_parse_pci_DomBDF(name, &addr) == 0) {
+ if (rte_eal_pci_probe_one(&addr) < 0)
+ goto err;
+
+ } else {
+ if (rte_eal_vdev_init(name, devargs))
+ goto err;
+ }
+
+ return 0;
+
+err:
+ RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", name);
+ return -EINVAL;
+}
+
+int rte_eal_dev_detach(const char *name)
+{
+ struct rte_pci_addr addr;
- /*
- * search a driver prefix in virtual device name.
- * For example, if the driver is pcap PMD, driver->name
- * will be "eth_pcap", but "name" will be "eth_pcapN".
- * So use strncmp to compare.
- */
- if (!strncmp(driver->name, name, strlen(driver->name)))
- return driver->uninit(name);
+ if (name == NULL) {
+ RTE_LOG(ERR, EAL, "Invalid device provided.\n");
+ return -EINVAL;
}
- RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
+ if (eal_parse_pci_DomBDF(name, &addr) == 0) {
+ if (rte_eal_pci_detach(&addr) < 0)
+ goto err;
+ } else {
+ if (rte_eal_vdev_uninit(name))
+ goto err;
+ }
+ return 0;
+
+err:
+ RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n", name);
return -EINVAL;
}
diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c
index 7916c781..e45d3269 100644
--- a/lib/librte_eal/common/eal_common_log.c
+++ b/lib/librte_eal/common/eal_common_log.c
@@ -48,11 +48,12 @@ struct rte_logs rte_logs = {
.file = NULL,
};
+/* Stream to use for logging if rte_logs.file is NULL */
static FILE *default_log_stream;
/**
* This global structure stores some informations about the message
- * that is currently beeing processed by one lcore
+ * that is currently being processed by one lcore
*/
struct log_cur_msg {
uint32_t loglevel; /**< log level - see rte_log.h */
@@ -64,27 +65,11 @@ static RTE_DEFINE_PER_LCORE(struct log_cur_msg, log_cur_msg);
/* default logs */
-int
-rte_log_add_in_history(const char *buf __rte_unused, size_t size __rte_unused)
-{
- return 0;
-}
-
-void
-rte_log_set_history(int enable)
-{
- if (enable)
- RTE_LOG(WARNING, EAL, "The log history is deprecated.\n");
-}
-
/* Change the stream that will be used by logging system */
int
rte_openlog_stream(FILE *f)
{
- if (f == NULL)
- rte_logs.file = default_log_stream;
- else
- rte_logs.file = f;
+ rte_logs.file = f;
return 0;
}
@@ -131,12 +116,6 @@ int rte_log_cur_msg_logtype(void)
return RTE_PER_LCORE(log_cur_msg).logtype;
}
-/* Dump log history to file */
-void
-rte_log_dump_history(FILE *out __rte_unused)
-{
-}
-
/*
* Generates a log message The message will be sent in the stream
* defined by the previous call to rte_openlog_stream().
@@ -146,6 +125,19 @@ rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap)
{
int ret;
FILE *f = rte_logs.file;
+ if (f == NULL) {
+ f = default_log_stream;
+ if (f == NULL) {
+ /*
+ * Grab the current value of stderr here, rather than
+ * just initializing default_log_stream to stderr. This
+ * ensures that we will always use the current value
+ * of stderr, even if the application closes and
+ * reopens it.
+ */
+ f = stderr;
+ }
+ }
if ((level > rte_logs.level) || !(logtype & rte_logs.type))
return 0;
@@ -177,17 +169,14 @@ rte_log(uint32_t level, uint32_t logtype, const char *format, ...)
}
/*
- * called by environment-specific log init function
+ * Called by environment-specific initialization functions.
*/
-int
-rte_eal_common_log_init(FILE *default_log)
+void
+eal_log_set_default(FILE *default_log)
{
default_log_stream = default_log;
- rte_openlog_stream(default_log);
#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG
RTE_LOG(NOTICE, EAL, "Debug logs available - lower performance\n");
#endif
-
- return 0;
}
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index 1bd0a33d..64f4e0ad 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -337,19 +337,7 @@ rte_memzone_free(const struct rte_memzone *mz)
idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone);
idx = idx / sizeof(struct rte_memzone);
-#ifdef RTE_LIBRTE_IVSHMEM
- /*
- * If ioremap_addr is set, it's an IVSHMEM memzone and we cannot
- * free it.
- */
- if (mcfg->memzone[idx].ioremap_addr != 0) {
- rte_rwlock_write_unlock(&mcfg->mlock);
- return -EINVAL;
- }
-#endif
-
addr = mcfg->memzone[idx].addr;
-
if (addr == NULL)
ret = -EINVAL;
else if (mcfg->memzone_cnt == 0) {
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 1a1bab36..6ca8af17 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -1021,7 +1021,7 @@ eal_common_usage(void)
" [NOTE: PCI whitelist cannot be used with -b option]\n"
" --"OPT_VDEV" Add a virtual device.\n"
" The argument format is <driver><id>[,key=val,...]\n"
- " (ex: --vdev=eth_pcap0,iface=eth2).\n"
+ " (ex: --vdev=net_pcap0,iface=eth2).\n"
" -d LIB.so|DIR Add a driver or driver directory\n"
" (can be used multiple times)\n"
" --"OPT_VMWARE_TSC_MAP" Use VMware TSC map instead of native RDTSC\n"
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
index 096c65e4..6bff6752 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -82,8 +82,10 @@
#include "eal_private.h"
-struct pci_driver_list pci_driver_list;
-struct pci_device_list pci_device_list;
+struct pci_driver_list pci_driver_list =
+ TAILQ_HEAD_INITIALIZER(pci_driver_list);
+struct pci_device_list pci_device_list =
+ TAILQ_HEAD_INITIALIZER(pci_device_list);
#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
@@ -151,7 +153,7 @@ pci_unmap_resource(void *requested_addr, size_t size)
}
/*
- * If vendor/device ID match, call the devinit() function of the
+ * If vendor/device ID match, call the probe() function of the
* driver.
*/
static int
@@ -183,17 +185,18 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d
RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
loc->domain, loc->bus, loc->devid, loc->function,
- dev->numa_node);
+ dev->device.numa_node);
/* no initialization when blacklisted, return without error */
- if (dev->devargs != NULL &&
- dev->devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) {
+ if (dev->device.devargs != NULL &&
+ dev->device.devargs->type ==
+ RTE_DEVTYPE_BLACKLISTED_PCI) {
RTE_LOG(INFO, EAL, " Device is blacklisted, not initializing\n");
return 1;
}
RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id,
- dev->id.device_id, dr->name);
+ dev->id.device_id, dr->driver.name);
if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
/* map resources for devices that use igb_uio */
@@ -210,15 +213,19 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d
/* reference driver structure */
dev->driver = dr;
- /* call the driver devinit() function */
- return dr->devinit(dr, dev);
+ /* call the driver probe() function */
+ ret = dr->probe(dr, dev);
+ if (ret)
+ dev->driver = NULL;
+
+ return ret;
}
/* return positive value if driver doesn't support this device */
return 1;
}
/*
- * If vendor/device ID match, call the devuninit() function of the
+ * If vendor/device ID match, call the remove() function of the
* driver.
*/
static int
@@ -250,12 +257,12 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr,
RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
loc->domain, loc->bus, loc->devid,
- loc->function, dev->numa_node);
+ loc->function, dev->device.numa_node);
RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id,
- dev->id.device_id, dr->name);
+ dev->id.device_id, dr->driver.name);
- if (dr->devuninit && (dr->devuninit(dev) < 0))
+ if (dr->remove && (dr->remove(dev) < 0))
return -1; /* negative value is an error */
/* clear driver structure */
@@ -273,7 +280,7 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr,
}
/*
- * If vendor/device ID match, call the devinit() function of all
+ * If vendor/device ID match, call the probe() function of all
* registered driver for the given device. Return -1 if initialization
* failed, return 1 if no driver is found for this device.
*/
@@ -286,6 +293,10 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
if (dev == NULL)
return -1;
+ /* Check if a driver is already loaded */
+ if (dev->driver != NULL)
+ return 0;
+
TAILQ_FOREACH(dr, &pci_driver_list, next) {
rc = rte_eal_pci_probe_one_driver(dr, dev);
if (rc < 0)
@@ -300,7 +311,7 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
}
/*
- * If vendor/device ID match, call the devuninit() function of all
+ * If vendor/device ID match, call the remove() function of all
* registered driver for the given device. Return -1 if initialization
* failed, return 1 if no driver is found for this device.
*/
@@ -339,6 +350,12 @@ rte_eal_pci_probe_one(const struct rte_pci_addr *addr)
if (addr == NULL)
return -1;
+ /* update current pci device in global list, kernel bindings might have
+ * changed since last time we looked at it.
+ */
+ if (pci_update_device(addr) < 0)
+ goto err_return;
+
TAILQ_FOREACH(dev, &pci_device_list, next) {
if (rte_eal_compare_pci_addr(&dev->addr, addr))
continue;
@@ -351,9 +368,9 @@ rte_eal_pci_probe_one(const struct rte_pci_addr *addr)
return -1;
err_return:
- RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT
- " cannot be used\n", dev->addr.domain, dev->addr.bus,
- dev->addr.devid, dev->addr.function);
+ RTE_LOG(WARNING, EAL,
+ "Requested device " PCI_PRI_FMT " cannot be used\n",
+ addr->domain, addr->bus, addr->devid, addr->function);
return -1;
}
@@ -391,7 +408,7 @@ err_return:
}
/*
- * Scan the content of the PCI bus, and call the devinit() function for
+ * Scan the content of the PCI bus, and call the probe() function for
* all registered drivers that have a matching entry in its id_table
* for discovered devices.
*/
@@ -411,7 +428,7 @@ rte_eal_pci_probe(void)
/* set devargs in PCI structure */
devargs = pci_devargs_lookup(dev);
if (devargs != NULL)
- dev->devargs = devargs;
+ dev->device.devargs = devargs;
/* probe all or only whitelisted devices */
if (probe_all)
@@ -464,11 +481,13 @@ void
rte_eal_pci_register(struct rte_pci_driver *driver)
{
TAILQ_INSERT_TAIL(&pci_driver_list, driver, next);
+ rte_eal_driver_register(&driver->driver);
}
/* unregister a driver */
void
rte_eal_pci_unregister(struct rte_pci_driver *driver)
{
+ rte_eal_driver_unregister(&driver->driver);
TAILQ_REMOVE(&pci_driver_list, driver, next);
}
diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c
index c4227cd8..72656176 100644
--- a/lib/librte_eal/common/eal_common_timer.c
+++ b/lib/librte_eal/common/eal_common_timer.c
@@ -47,8 +47,11 @@
/* The frequency of the RDTSC timer resolution */
static uint64_t eal_tsc_resolution_hz;
+/* Pointer to user delay function */
+void (*rte_delay_us)(unsigned int) = NULL;
+
void
-rte_delay_us(unsigned us)
+rte_delay_us_block(unsigned int us)
{
const uint64_t start = rte_get_timer_cycles();
const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6;
@@ -84,3 +87,15 @@ set_tsc_freq(void)
RTE_LOG(DEBUG, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000);
eal_tsc_resolution_hz = freq;
}
+
+void rte_delay_us_callback_register(void (*userfunc)(unsigned int))
+{
+ rte_delay_us = userfunc;
+}
+
+static void __attribute__((constructor))
+rte_timer_init(void)
+{
+ /* set rte_delay_us_block as a delay function */
+ rte_delay_us_callback_register(rte_delay_us_block);
+}
diff --git a/lib/librte_eal/common/eal_common_vdev.c b/lib/librte_eal/common/eal_common_vdev.c
new file mode 100644
index 00000000..0ff2377d
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_vdev.c
@@ -0,0 +1,116 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 RehiveTech. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/queue.h>
+
+#include <rte_vdev.h>
+#include <rte_common.h>
+
+struct vdev_driver_list vdev_driver_list =
+ TAILQ_HEAD_INITIALIZER(vdev_driver_list);
+
+/* register a driver */
+void
+rte_eal_vdrv_register(struct rte_vdev_driver *driver)
+{
+ TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next);
+ rte_eal_driver_register(&driver->driver);
+}
+
+/* unregister a driver */
+void
+rte_eal_vdrv_unregister(struct rte_vdev_driver *driver)
+{
+ rte_eal_driver_unregister(&driver->driver);
+ TAILQ_REMOVE(&vdev_driver_list, driver, next);
+}
+
+int
+rte_eal_vdev_init(const char *name, const char *args)
+{
+ struct rte_vdev_driver *driver;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ TAILQ_FOREACH(driver, &vdev_driver_list, next) {
+ /*
+ * search a driver prefix in virtual device name.
+ * For example, if the driver is pcap PMD, driver->name
+ * will be "net_pcap", but "name" will be "net_pcapN".
+ * So use strncmp to compare.
+ */
+ if (!strncmp(driver->driver.name, name,
+ strlen(driver->driver.name)))
+ return driver->probe(name, args);
+ }
+
+ /* Give new names precedence over aliases. */
+ TAILQ_FOREACH(driver, &vdev_driver_list, next) {
+ if (driver->driver.alias &&
+ !strncmp(driver->driver.alias, name,
+ strlen(driver->driver.alias)))
+ return driver->probe(name, args);
+ }
+
+ RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
+ return -EINVAL;
+}
+
+int
+rte_eal_vdev_uninit(const char *name)
+{
+ struct rte_vdev_driver *driver;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ TAILQ_FOREACH(driver, &vdev_driver_list, next) {
+ /*
+ * search a driver prefix in virtual device name.
+ * For example, if the driver is pcap PMD, driver->name
+ * will be "net_pcap", but "name" will be "net_pcapN".
+ * So use strncmp to compare.
+ */
+ if (!strncmp(driver->driver.name, name,
+ strlen(driver->driver.name)))
+ return driver->remove(name);
+ }
+
+ RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
+ return -EINVAL;
+}
diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h
index fdb4a70b..8acbd996 100644
--- a/lib/librte_eal/common/eal_filesystem.h
+++ b/lib/librte_eal/common/eal_filesystem.h
@@ -97,17 +97,6 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id
return buffer;
}
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-static inline const char *
-eal_get_hugefile_temp_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
-{
- snprintf(buffer, buflen, TEMP_HUGEFILE_FMT, hugedir,
- internal_config.hugefile_prefix, f_id);
- buffer[buflen - 1] = '\0';
- return buffer;
-}
-#endif
-
/** define the default filename prefix for the %s values above */
#define HUGEFILE_PREFIX_DEFAULT "rte"
diff --git a/lib/librte_eal/common/eal_hugepages.h b/lib/librte_eal/common/eal_hugepages.h
index 38edac03..68369f26 100644
--- a/lib/librte_eal/common/eal_hugepages.h
+++ b/lib/librte_eal/common/eal_hugepages.h
@@ -52,9 +52,6 @@ struct hugepage_file {
int socket_id; /**< NUMA socket ID */
int file_id; /**< the '%d' in HUGEFILE_FMT */
int memseg_id; /**< the memory segment to which page belongs */
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
- int repeated; /**< number of times the page size is repeated */
-#endif
char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */
};
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 857dc3ea..9e7d8f6b 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -47,7 +47,9 @@
int rte_eal_memzone_init(void);
/**
- * Common log initialization function (private to eal).
+ * Common log initialization function (private to eal). Determines
+ * where log data is written when no call to rte_openlog_stream is
+ * in effect.
*
* @param default_log
* The default log stream to be used.
@@ -55,7 +57,7 @@ int rte_eal_memzone_init(void);
* - 0 on success
* - Negative on error
*/
-int rte_eal_common_log_init(FILE *default_log);
+void eal_log_set_default(FILE *default_log);
/**
* Fill configuration with number of physical and logical processors
@@ -97,16 +99,6 @@ int rte_eal_memory_init(void);
int rte_eal_timer_init(void);
/**
- * Init early logs
- *
- * This function is private to EAL.
- *
- * @return
- * 0 on success, negative on error
- */
-int rte_eal_log_early_init(void);
-
-/**
* Init the default log stream
*
* This function is private to EAL.
@@ -117,7 +109,7 @@ int rte_eal_log_early_init(void);
int rte_eal_log_init(const char *id, int facility);
/**
- * Init the default log stream
+ * Init the PCI infrastructure
*
* This function is private to EAL.
*
@@ -126,30 +118,21 @@ int rte_eal_log_init(const char *id, int facility);
*/
int rte_eal_pci_init(void);
-#ifdef RTE_LIBRTE_IVSHMEM
-/**
- * Init the memory from IVSHMEM devices
- *
- * This function is private to EAL.
- *
- * @return
- * 0 on success, negative on error
- */
-int rte_eal_ivshmem_init(void);
+struct rte_pci_driver;
+struct rte_pci_device;
/**
- * Init objects in IVSHMEM devices
+ * Update a pci device object by asking the kernel for the latest information.
*
* This function is private to EAL.
*
+ * @param addr
+ * The PCI Bus-Device-Function address to look for
* @return
- * 0 on success, negative on error
+ * - 0 on success.
+ * - negative on error.
*/
-int rte_eal_ivshmem_obj_init(void);
-#endif
-
-struct rte_pci_driver;
-struct rte_pci_device;
+int pci_update_device(const struct rte_pci_addr *addr);
/**
* Unbind kernel driver for this device
@@ -259,13 +242,6 @@ int rte_eal_intr_init(void);
int rte_eal_alarm_init(void);
/**
- * This function initialises any virtual devices
- *
- * This function is private to the EAL.
- */
-int rte_eal_dev_init(void);
-
-/**
* Function is to check if the kernel module(like, vfio, vfio_iommu_type1,
* etc.) loaded.
*
diff --git a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
index 3f2dd1f2..1b312b30 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
@@ -41,6 +41,8 @@
extern "C" {
#endif
+#include <stdint.h>
+#include <rte_common.h>
#include "generic/rte_byteorder.h"
/* fix missing __builtin_bswap16 for gcc older then 4.8 */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
index 14f26120..867a9468 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
@@ -45,6 +45,11 @@ extern "C" {
* @return
* The time base for this lcore.
*/
+#ifndef RTE_ARM_EAL_RDTSC_USE_PMU
+/**
+ * This call is portable to any ARMv8 architecture, however, typically
+ * cntvct_el0 runs at <= 100MHz and it may be imprecise for some tasks.
+ */
static inline uint64_t
rte_rdtsc(void)
{
@@ -53,6 +58,34 @@ rte_rdtsc(void)
asm volatile("mrs %0, cntvct_el0" : "=r" (tsc));
return tsc;
}
+#else
+/**
+ * This is an alternative method to enable rte_rdtsc() with high resolution
+ * PMU cycles counter.The cycle counter runs at cpu frequency and this scheme
+ * uses ARMv8 PMU subsystem to get the cycle counter at userspace, However,
+ * access to PMU cycle counter from user space is not enabled by default in
+ * arm64 linux kernel.
+ * It is possible to enable cycle counter at user space access by configuring
+ * the PMU from the privileged mode (kernel space).
+ *
+ * asm volatile("msr pmintenset_el1, %0" : : "r" ((u64)(0 << 31)));
+ * asm volatile("msr pmcntenset_el0, %0" :: "r" BIT(31));
+ * asm volatile("msr pmuserenr_el0, %0" : : "r"(BIT(0) | BIT(2)));
+ * asm volatile("mrs %0, pmcr_el0" : "=r" (val));
+ * val |= (BIT(0) | BIT(2));
+ * isb();
+ * asm volatile("msr pmcr_el0, %0" : : "r" (val));
+ *
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+ uint64_t tsc;
+
+ asm volatile("mrs %0, pmccntr_el0" : "=r"(tsc));
+ return tsc;
+}
+#endif
static inline uint64_t
rte_rdtsc_precise(void)
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
index da6c233a..c3a26192 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
@@ -148,7 +148,8 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
}
#define rte_memcpy(dst, src, n) \
- ({ (__builtin_constant_p(n)) ? \
+ __extension__ ({ \
+ (__builtin_constant_p(n)) ? \
memcpy((dst), (src), (n)) : \
rte_memcpy_func((dst), (src), (n)); })
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
index 5aeed22d..43cde172 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
@@ -37,6 +37,7 @@
extern "C" {
#endif
+#include <rte_common.h>
#include "generic/rte_prefetch.h"
static inline void rte_prefetch0(const volatile void *p)
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
index 3ed46a46..0d077ea6 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
@@ -37,6 +37,7 @@
extern "C" {
#endif
+#include <rte_common.h>
#include "generic/rte_prefetch.h"
static inline void rte_prefetch0(const volatile void *p)
diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
index a33c0544..b86c2cf5 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
@@ -33,6 +33,7 @@
#ifndef _RTE_VECT_ARM_H_
#define _RTE_VECT_ARM_H_
+#include <stdint.h>
#include "arm_neon.h"
#ifdef __cplusplus
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
index 924e8940..fb4fccb4 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
@@ -46,6 +46,7 @@
extern "C" {
#endif
+#include <stdint.h>
#include "generic/rte_atomic.h"
/**
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
index 3c1734ed..544de3c2 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
@@ -42,6 +42,7 @@
extern "C" {
#endif
+#include <stdint.h>
#include "generic/rte_byteorder.h"
/*
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
index 64beddf9..8fa6fc60 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
@@ -40,6 +40,7 @@ extern "C" {
#include "generic/rte_cycles.h"
#include <rte_byteorder.h>
+#include <rte_common.h>
/**
* Read the time base register.
@@ -52,6 +53,7 @@ rte_rdtsc(void)
{
union {
uint64_t tsc_64;
+ RTE_STD_C11
struct {
#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
uint32_t hi_32;
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
index acf7aac2..ca9d1dc5 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
@@ -95,7 +95,8 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
}
#define rte_memcpy(dst, src, n) \
- ({ (__builtin_constant_p(n)) ? \
+ __extension__ ({ \
+ (__builtin_constant_p(n)) ? \
memcpy((dst), (src), (n)) : \
rte_memcpy_func((dst), (src), (n)); })
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
index 9a1995ea..fd2e53b9 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
@@ -37,6 +37,7 @@
extern "C" {
#endif
+#include <rte_common.h>
#include "generic/rte_prefetch.h"
static inline void rte_prefetch0(const volatile void *p)
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h b/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h
index 3f67154b..05209e52 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h
@@ -1,8 +1,7 @@
-/*-
+/*
* BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
+ * Copyright (C) IBM Corporation 2016.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -14,7 +13,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Intel Corporation nor the names of its
+ * * Neither the name of IBM Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
@@ -29,28 +28,33 @@
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef _VIRTIO_NET_CDEV_H
-#define _VIRTIO_NET_CDEV_H
+*/
-#include <stdint.h>
-#include <linux/vhost.h>
+#ifndef _RTE_VECT_PPC_64_H_
+#define _RTE_VECT_PPC_64_H_
-#include "vhost-net.h"
+#include <altivec.h>
-/*
- * Structure used to identify device context.
- */
-struct vhost_cuse_device_ctx {
- pid_t pid; /* PID of process calling the IOCTL. */
- int vid; /* Virtio-net device ID */
-};
+#ifdef __cplusplus
+extern "C" {
+#endif
-int
-cuse_set_mem_table(struct vhost_cuse_device_ctx ctx,
- const struct vhost_memory *mem_regions_addr, uint32_t nregions);
+typedef vector signed int xmm_t;
-int
-cuse_set_backend(struct vhost_cuse_device_ctx ctx, struct vhost_vring_file *);
+#define XMM_SIZE (sizeof(xmm_t))
+#define XMM_MASK (XMM_SIZE - 1)
+typedef union rte_xmm {
+ xmm_t x;
+ uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
+ uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
+ uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
+ uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
+ double pd[XMM_SIZE / sizeof(double)];
+} __attribute__((aligned(16))) rte_xmm_t;
+
+#ifdef __cplusplus
+}
#endif
+
+#endif /* _RTE_VECT_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
index b20056b8..00b1cdf5 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
@@ -38,6 +38,8 @@
extern "C" {
#endif
+#include <stdint.h>
+#include <rte_common.h>
#include <emmintrin.h>
#include "generic/rte_atomic.h"
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
index 400d8a96..2e04c759 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
@@ -37,9 +37,17 @@
* All rights reserved.
*/
+#ifndef _RTE_ATOMIC_X86_H_
+#error do not include this file directly, use <rte_atomic.h> instead
+#endif
+
#ifndef _RTE_ATOMIC_I686_H_
#define _RTE_ATOMIC_I686_H_
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
/*------------------------- 64 bit atomic operations -------------------------*/
#ifndef RTE_FORCE_INTRINSICS
@@ -47,6 +55,7 @@ static inline int
rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
{
uint8_t res;
+ RTE_STD_C11
union {
struct {
uint32_t l32;
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
index 4de66000..1a53a766 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
@@ -37,9 +37,17 @@
* All rights reserved.
*/
+#ifndef _RTE_ATOMIC_X86_H_
+#error do not include this file directly, use <rte_atomic.h> instead
+#endif
+
#ifndef _RTE_ATOMIC_X86_64_H_
#define _RTE_ATOMIC_X86_64_H_
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
/*------------------------- 64 bit atomic operations -------------------------*/
#ifndef RTE_FORCE_INTRINSICS
diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder.h
index ffdb6ef5..251f11b4 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_byteorder.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder.h
@@ -38,6 +38,8 @@
extern "C" {
#endif
+#include <stdint.h>
+#include <rte_common.h>
#include "generic/rte_byteorder.h"
#ifndef RTE_BYTE_ORDER
diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h
index 51c306f8..14d64834 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h
@@ -31,9 +31,16 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#ifndef _RTE_BYTEORDER_X86_H_
+#error do not include this file directly, use <rte_byteorder.h> instead
+#endif
+
#ifndef _RTE_BYTEORDER_I686_H_
#define _RTE_BYTEORDER_I686_H_
+#include <stdint.h>
+#include <rte_byteorder.h>
+
/*
* An architecture-optimized byte swap for a 64-bit value.
*
diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h
index dda572bd..516ac052 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h
@@ -31,9 +31,16 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#ifndef _RTE_BYTEORDER_X86_H_
+#error do not include this file directly, use <rte_byteorder.h> instead
+#endif
+
#ifndef _RTE_BYTEORDER_X86_64_H_
#define _RTE_BYTEORDER_X86_64_H_
+#include <stdint.h>
+#include <rte_common.h>
+
/*
* An architecture-optimized byte swap for a 64-bit value.
*
diff --git a/lib/librte_eal/common/include/arch/x86/rte_cycles.h b/lib/librte_eal/common/include/arch/x86/rte_cycles.h
index 6e3c7d89..5eb6ce96 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_cycles.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_cycles.h
@@ -75,12 +75,14 @@ extern "C" {
extern int rte_cycles_vmware_tsc_map;
#include <rte_branch_prediction.h>
#endif
+#include <rte_common.h>
static inline uint64_t
rte_rdtsc(void)
{
union {
uint64_t tsc_64;
+ RTE_STD_C11
struct {
uint32_t lo_32;
uint32_t hi_32;
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
index 413035e7..b3bfc235 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
@@ -594,7 +594,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
* - __m128i <xmm0> ~ <xmm8> must be pre-defined
*/
#define MOVEUNALIGNED_LEFT47_IMM(dst, src, len, offset) \
-({ \
+__extension__ ({ \
int tmp; \
while (len >= 128 + 16 - offset) { \
xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \
@@ -655,7 +655,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
* - __m128i <xmm0> ~ <xmm8> used in MOVEUNALIGNED_LEFT47_IMM must be pre-defined
*/
#define MOVEUNALIGNED_LEFT47(dst, src, len, offset) \
-({ \
+__extension__ ({ \
switch (offset) { \
case 0x01: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x01); break; \
case 0x02: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x02); break; \
diff --git a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
index 5dac47eb..f464398f 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
@@ -38,6 +38,7 @@
extern "C" {
#endif
+#include <rte_common.h>
#include "generic/rte_prefetch.h"
static inline void rte_prefetch0(const volatile void *p)
diff --git a/lib/librte_eal/common/include/arch/x86/rte_rtm.h b/lib/librte_eal/common/include/arch/x86/rte_rtm.h
index 0649f794..ab099952 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_rtm.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_rtm.h
@@ -20,6 +20,7 @@
/* Official RTM intrinsics interface matching gcc/icc, but works
on older gcc compatible compilers and binutils. */
+#include <rte_common.h>
#ifdef __cplusplus
extern "C" {
diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h
index b698797c..77f2e253 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h
@@ -40,6 +40,8 @@
* RTE SSE/AVX related header.
*/
+#include <stdint.h>
+
#if (defined(__ICC) || (__GNUC__ == 4 && __GNUC_MINOR__ < 4))
#ifdef __SSE__
@@ -106,7 +108,8 @@ typedef union rte_ymm {
#endif /* __AVX__ */
#ifdef RTE_ARCH_I686
-#define _mm_cvtsi128_si64(a) ({ \
+#define _mm_cvtsi128_si64(a) \
+__extension__ ({ \
rte_xmm_t m; \
m.x = (a); \
(m.u64[0]); \
@@ -117,7 +120,8 @@ typedef union rte_ymm {
* Prior to version 12.1 icc doesn't support _mm_set_epi64x.
*/
#if (defined(__ICC) && __ICC < 1210)
-#define _mm_set_epi64x(a, b) ({ \
+#define _mm_set_epi64x(a, b) \
+__extension__ ({ \
rte_xmm_t m; \
m.u64[0] = b; \
m.u64[1] = a; \
diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h
index bfb4fe44..43a704ec 100644
--- a/lib/librte_eal/common/include/generic/rte_atomic.h
+++ b/lib/librte_eal/common/include/generic/rte_atomic.h
@@ -42,6 +42,7 @@
*/
#include <stdint.h>
+#include <rte_common.h>
#ifdef __DOXYGEN__
diff --git a/lib/librte_eal/common/include/generic/rte_byteorder.h b/lib/librte_eal/common/include/generic/rte_byteorder.h
index c46fdcf2..e00bccbc 100644
--- a/lib/librte_eal/common/include/generic/rte_byteorder.h
+++ b/lib/librte_eal/common/include/generic/rte_byteorder.h
@@ -50,6 +50,8 @@
#include <endian.h>
#endif
+#include <rte_common.h>
+
/*
* Compile-time endianness detection
*/
diff --git a/lib/librte_eal/common/include/generic/rte_cpuflags.h b/lib/librte_eal/common/include/generic/rte_cpuflags.h
index c1da357c..71321f32 100644
--- a/lib/librte_eal/common/include/generic/rte_cpuflags.h
+++ b/lib/librte_eal/common/include/generic/rte_cpuflags.h
@@ -44,6 +44,7 @@
/**
* Enumeration of all CPU features supported
*/
+__extension__
enum rte_cpu_flag_t;
/**
@@ -55,6 +56,7 @@ enum rte_cpu_flag_t;
* flag name
* NULL if flag ID is invalid
*/
+__extension__
const char *
rte_cpu_get_flag_name(enum rte_cpu_flag_t feature);
@@ -68,6 +70,7 @@ rte_cpu_get_flag_name(enum rte_cpu_flag_t feature);
* 0 if flag is not available
* -ENOENT if flag is invalid
*/
+__extension__
int
rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature);
diff --git a/lib/librte_eal/common/include/generic/rte_cycles.h b/lib/librte_eal/common/include/generic/rte_cycles.h
index 8cc21f20..00103ca9 100644
--- a/lib/librte_eal/common/include/generic/rte_cycles.h
+++ b/lib/librte_eal/common/include/generic/rte_cycles.h
@@ -180,15 +180,16 @@ rte_get_timer_hz(void)
default: rte_panic("Invalid timer source specified\n");
}
}
-
/**
* Wait at least us microseconds.
+ * This function can be replaced with user-defined function.
+ * @see rte_delay_us_callback_register
*
* @param us
* The number of microseconds to wait.
*/
-void
-rte_delay_us(unsigned us);
+extern void
+(*rte_delay_us)(unsigned int us);
/**
* Wait at least ms milliseconds.
@@ -202,4 +203,21 @@ rte_delay_ms(unsigned ms)
rte_delay_us(ms * 1000);
}
+/**
+ * Blocking delay function.
+ *
+ * @param us
+ * Number of microseconds to wait.
+ */
+void rte_delay_us_block(unsigned int us);
+
+/**
+ * Replace rte_delay_us with user defined function.
+ *
+ * @param userfunc
+ * User function which replaces rte_delay_us. rte_delay_us_block restores
+ * buildin block delay function.
+ */
+void rte_delay_us_callback_register(void(*userfunc)(unsigned int));
+
#endif /* _RTE_CYCLES_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_memcpy.h b/lib/librte_eal/common/include/generic/rte_memcpy.h
index afb0afe4..4e9d8794 100644
--- a/lib/librte_eal/common/include/generic/rte_memcpy.h
+++ b/lib/librte_eal/common/include/generic/rte_memcpy.h
@@ -64,6 +64,8 @@ rte_mov16(uint8_t *dst, const uint8_t *src);
static inline void
rte_mov32(uint8_t *dst, const uint8_t *src);
+#ifdef __DOXYGEN__
+
/**
* Copy 48 bytes from one location to another using optimised
* instructions. The locations should not overlap.
@@ -76,6 +78,8 @@ rte_mov32(uint8_t *dst, const uint8_t *src);
static inline void
rte_mov48(uint8_t *dst, const uint8_t *src);
+#endif /* __DOXYGEN__ */
+
/**
* Copy 64 bytes from one location to another using optimised
* instructions. The locations should not overlap.
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index 332f2a43..db5ac91c 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -59,6 +59,13 @@ extern "C" {
#define asm __asm__
#endif
+/** C extension macro for environments lacking C11 features. */
+#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L
+#define RTE_STD_C11 __extension__
+#else
+#define RTE_STD_C11
+#endif
+
#ifdef RTE_ARCH_STRICT_ALIGN
typedef uint64_t unaligned_uint64_t __attribute__ ((aligned(1)));
typedef uint32_t unaligned_uint32_t __attribute__ ((aligned(1)));
@@ -268,7 +275,8 @@ rte_align64pow2(uint64_t v)
/**
* Macro to return the minimum of two numbers
*/
-#define RTE_MIN(a, b) ({ \
+#define RTE_MIN(a, b) \
+ __extension__ ({ \
typeof (a) _a = (a); \
typeof (b) _b = (b); \
_a < _b ? _a : _b; \
@@ -277,7 +285,8 @@ rte_align64pow2(uint64_t v)
/**
* Macro to return the maximum of two numbers
*/
-#define RTE_MAX(a, b) ({ \
+#define RTE_MAX(a, b) \
+ __extension__ ({ \
typeof (a) _a = (a); \
typeof (b) _b = (b); \
_a > _b ? _a : _b; \
@@ -326,6 +335,15 @@ rte_bsf32(uint32_t v)
/** Take a macro value and get a string version of it */
#define RTE_STR(x) _RTE_STR(x)
+/**
+ * ISO C helpers to modify format strings using variadic macros.
+ * This is a replacement for the ", ## __VA_ARGS__" GNU extension.
+ * An empty %s argument is appended to avoid a dangling comma.
+ */
+#define RTE_FMT(fmt, ...) fmt "%.0s", __VA_ARGS__ ""
+#define RTE_FMT_HEAD(fmt, ...) fmt
+#define RTE_FMT_TAIL(fmt, ...) __VA_ARGS__
+
/** Mask value of type "tp" for the first "ln" bit set. */
#define RTE_LEN2MASK(ln, tp) \
((tp)((uint64_t)-1 >> (sizeof(uint64_t) * CHAR_BIT - (ln))))
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index 95789f9d..8840380d 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -100,37 +100,56 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...)
} \
} while (0)
+/**
+ * A generic memory resource representation.
+ */
+struct rte_mem_resource {
+ uint64_t phys_addr; /**< Physical address, 0 if not resource. */
+ uint64_t len; /**< Length of the resource. */
+ void *addr; /**< Virtual address, NULL when not mapped. */
+};
/** Double linked list of device drivers. */
TAILQ_HEAD(rte_driver_list, rte_driver);
+/** Double linked list of devices. */
+TAILQ_HEAD(rte_device_list, rte_device);
+
+/* Forward declaration */
+struct rte_driver;
/**
- * Initialization function called for each device driver once.
+ * A structure describing a generic device.
*/
-typedef int (rte_dev_init_t)(const char *name, const char *args);
+struct rte_device {
+ TAILQ_ENTRY(rte_device) next; /**< Next device */
+ struct rte_driver *driver; /**< Associated driver */
+ int numa_node; /**< NUMA node connection */
+ struct rte_devargs *devargs; /**< Device user arguments */
+};
/**
- * Uninitilization function called for each device driver once.
+ * Insert a device detected by a bus scanning.
+ *
+ * @param dev
+ * A pointer to a rte_device structure describing the detected device.
*/
-typedef int (rte_dev_uninit_t)(const char *name);
+void rte_eal_device_insert(struct rte_device *dev);
/**
- * Driver type enumeration
+ * Remove a device (e.g. when being unplugged).
+ *
+ * @param dev
+ * A pointer to a rte_device structure describing the device to be removed.
*/
-enum pmd_type {
- PMD_VDEV = 0,
- PMD_PDEV = 1,
-};
+void rte_eal_device_remove(struct rte_device *dev);
/**
* A structure describing a device driver.
*/
struct rte_driver {
TAILQ_ENTRY(rte_driver) next; /**< Next in list. */
- enum pmd_type type; /**< PMD Driver type */
const char *name; /**< Driver name. */
- rte_dev_init_t *init; /**< Device init. function. */
- rte_dev_uninit_t *uninit; /**< Device uninit. function. */
+ const char *alias; /**< Driver alias. */
};
/**
@@ -178,28 +197,45 @@ int rte_eal_vdev_init(const char *name, const char *args);
*/
int rte_eal_vdev_uninit(const char *name);
-#define DRIVER_EXPORT_NAME_ARRAY(n, idx) n##idx[]
+/**
+ * Attach a device to a registered driver.
+ *
+ * @param name
+ * The device name, that refers to a pci device (or some private
+ * way of designating a vdev device). Based on this device name, eal
+ * will identify a driver capable of handling it and pass it to the
+ * driver probing function.
+ * @param devargs
+ * Device arguments to be passed to the driver.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_eal_dev_attach(const char *name, const char *devargs);
-#define DRIVER_EXPORT_NAME(name, idx) \
-static const char DRIVER_EXPORT_NAME_ARRAY(this_pmd_name, idx) \
-__attribute__((used)) = RTE_STR(name)
+/**
+ * Detach a device from its driver.
+ *
+ * @param name
+ * Same description as for rte_eal_dev_attach().
+ * Here, eal will call the driver detaching function.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_eal_dev_detach(const char *name);
-#define PMD_REGISTER_DRIVER(drv, nm)\
-void devinitfn_ ##drv(void);\
-void __attribute__((constructor, used)) devinitfn_ ##drv(void)\
-{\
- (drv).name = RTE_STR(nm);\
- rte_eal_driver_register(&drv);\
-} \
-DRIVER_EXPORT_NAME(nm, __COUNTER__)
+#define RTE_PMD_EXPORT_NAME_ARRAY(n, idx) n##idx[]
+
+#define RTE_PMD_EXPORT_NAME(name, idx) \
+static const char RTE_PMD_EXPORT_NAME_ARRAY(this_pmd_name, idx) \
+__attribute__((used)) = RTE_STR(name)
#define DRV_EXP_TAG(name, tag) __##name##_##tag
-#define DRIVER_REGISTER_PCI_TABLE(name, table) \
+#define RTE_PMD_REGISTER_PCI_TABLE(name, table) \
static const char DRV_EXP_TAG(name, pci_tbl_export)[] __attribute__((used)) = \
RTE_STR(table)
-#define DRIVER_REGISTER_PARAM_STRING(name, str) \
+#define RTE_PMD_REGISTER_PARAM_STRING(name, str) \
static const char DRV_EXP_TAG(name, param_string_export)[] \
__attribute__((used)) = str
diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
index 53c59f56..88120a1c 100644
--- a/lib/librte_eal/common/include/rte_devargs.h
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -76,6 +76,7 @@ struct rte_devargs {
TAILQ_ENTRY(rte_devargs) next;
/** Type of device. */
enum rte_devtype type;
+ RTE_STD_C11
union {
/** Used if type is RTE_DEVTYPE_*_PCI. */
struct {
@@ -106,8 +107,8 @@ extern struct rte_devargs_list devargs_list;
* "04:00.0,arg=val".
*
* For virtual devices, the format of arguments string is "DRIVER_NAME*"
- * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring",
- * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1".
+ * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring",
+ * "net_ring0", "net_pmdAnything,arg=0:arg2=1".
*
* The function parses the arguments string to get driver name and driver
* arguments.
@@ -134,8 +135,8 @@ int rte_eal_parse_devargs_str(const char *devargs_str,
* "04:00.0,arg=val".
*
* For virtual devices, the format of arguments string is "DRIVER_NAME*"
- * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring",
- * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". The validity of the
+ * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring",
+ * "net_ring0", "net_pmdAnything,arg=0:arg2=1". The validity of the
* driver name is not checked by this function, it is done when probing
* the drivers.
*
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index a71d6f57..d150b9dd 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -44,6 +44,7 @@
#include <sched.h>
#include <rte_per_lcore.h>
+#include <rte_config.h>
#ifdef __cplusplus
extern "C" {
@@ -252,6 +253,9 @@ static inline int rte_gettid(void)
return RTE_PER_LCORE(_thread_id);
}
+#define RTE_INIT(func) \
+static void __attribute__((constructor, used)) func(void)
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h
index ff11ef3a..fd3c6eff 100644
--- a/lib/librte_eal/common/include/rte_interrupts.h
+++ b/lib/librte_eal/common/include/rte_interrupts.h
@@ -34,6 +34,8 @@
#ifndef _RTE_INTERRUPTS_H_
#define _RTE_INTERRUPTS_H_
+#include <rte_common.h>
+
/**
* @file
*
diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h
index b1add04c..29f7d192 100644
--- a/lib/librte_eal/common/include/rte_log.h
+++ b/lib/librte_eal/common/include/rte_log.h
@@ -42,8 +42,6 @@
* This file provides a log API to RTE applications.
*/
-#include "rte_common.h" /* for __rte_deprecated macro */
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -56,7 +54,7 @@ extern "C" {
struct rte_logs {
uint32_t type; /**< Bitfield with enabled logs. */
uint32_t level; /**< Log level. */
- FILE *file; /**< Pointer to current FILE* for logs. */
+ FILE *file; /**< Output file set by rte_openlog_stream, or NULL. */
};
/** Global log informations */
@@ -102,9 +100,6 @@ extern struct rte_logs rte_logs;
#define RTE_LOG_INFO 7U /**< Informational. */
#define RTE_LOG_DEBUG 8U /**< Debug-level messages. */
-/** The default log stream. */
-extern FILE *eal_default_log_stream;
-
/**
* Change the stream that will be used by the logging system.
*
@@ -181,45 +176,6 @@ int rte_log_cur_msg_loglevel(void);
int rte_log_cur_msg_logtype(void);
/**
- * @deprecated
- * Enable or disable the history (enabled by default)
- *
- * @param enable
- * true to enable, or 0 to disable history.
- */
-__rte_deprecated
-void rte_log_set_history(int enable);
-
-/**
- * @deprecated
- * Dump the log history to a file
- *
- * @param f
- * A pointer to a file for output
- */
-__rte_deprecated
-void rte_log_dump_history(FILE *f);
-
-/**
- * @deprecated
- * Add a log message to the history.
- *
- * This function can be called from a user-defined log stream. It adds
- * the given message in the history that can be dumped using
- * rte_log_dump_history().
- *
- * @param buf
- * A data buffer containing the message to be saved in the history.
- * @param size
- * The length of the data buffer.
- * @return
- * - 0: Success.
- * - (-ENOBUFS) if there is no room to store the message.
- */
-__rte_deprecated
-int rte_log_add_in_history(const char *buf, size_t size);
-
-/**
* Generates a log message.
*
* The message will be sent in the stream defined by the previous call
diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
index 74bb78c7..008ce134 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -294,7 +294,7 @@ rte_malloc_get_socket_stats(int socket,
/**
* Dump statistics.
*
- * Dump for the specified type to the console. If the type argument is
+ * Dump for the specified type to a file. If the type argument is
* NULL, all memory types will be dumped.
*
* @param f
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 06611093..4aa5d1f7 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -44,6 +44,8 @@
#include <stddef.h>
#include <stdio.h>
+#include <rte_config.h>
+
#ifdef RTE_EXEC_ENV_LINUXAPP
#include <exec-env/rte_dom0_common.h>
#endif
@@ -54,6 +56,7 @@ extern "C" {
#include <rte_common.h>
+__extension__
enum rte_page_sizes {
RTE_PGSIZE_4K = 1ULL << 12,
RTE_PGSIZE_64K = 1ULL << 16,
@@ -103,13 +106,11 @@ typedef uint64_t phys_addr_t; /**< Physical address definition. */
*/
struct rte_memseg {
phys_addr_t phys_addr; /**< Start physical address. */
+ RTE_STD_C11
union {
void *addr; /**< Start virtual address. */
uint64_t addr_64; /**< Makes sure addr is always 64 bits */
};
-#ifdef RTE_LIBRTE_IVSHMEM
- phys_addr_t ioremap_addr; /**< Real physical address inside the VM */
-#endif
size_t len; /**< Length of the segment. */
uint64_t hugepage_sz; /**< The pagesize of underlying memory */
int32_t socket_id; /**< NUMA socket ID. */
@@ -161,7 +162,7 @@ phys_addr_t rte_mem_virt2phy(const void *virt);
const struct rte_memseg *rte_eal_get_physmem_layout(void);
/**
- * Dump the physical memory layout to the console.
+ * Dump the physical memory layout to a file.
*
* @param f
* A pointer to a file for output
diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h
index f69b5a87..1d0827f4 100644
--- a/lib/librte_eal/common/include/rte_memzone.h
+++ b/lib/librte_eal/common/include/rte_memzone.h
@@ -53,6 +53,7 @@
#include <stdio.h>
#include <rte_memory.h>
+#include <rte_common.h>
#ifdef __cplusplus
extern "C" {
@@ -78,13 +79,11 @@ struct rte_memzone {
char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the memory zone. */
phys_addr_t phys_addr; /**< Start physical address. */
+ RTE_STD_C11
union {
void *addr; /**< Start virtual address. */
uint64_t addr_64; /**< Makes sure addr is always 64-bits */
};
-#ifdef RTE_LIBRTE_IVSHMEM
- phys_addr_t ioremap_addr; /**< Real physical address inside the VM */
-#endif
size_t len; /**< Length of the memzone. */
uint64_t hugepage_sz; /**< The page size of underlying memory */
@@ -256,12 +255,10 @@ const struct rte_memzone *rte_memzone_reserve_bounded(const char *name,
/**
* Free a memzone.
*
- * Note: an IVSHMEM zone cannot be freed.
- *
* @param mz
* A pointer to the memzone
* @return
- * -EINVAL - invalid parameter, IVSHMEM memzone.
+ * -EINVAL - invalid parameter.
* 0 - success
*/
int rte_memzone_free(const struct rte_memzone *mz);
@@ -280,7 +277,7 @@ int rte_memzone_free(const struct rte_memzone *mz);
const struct rte_memzone *rte_memzone_lookup(const char *name);
/**
- * Dump all reserved memzones to the console.
+ * Dump all reserved memzones to a file.
*
* @param f
* A pointer to a file for output
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index fa749626..9ce88472 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -82,7 +82,9 @@ extern "C" {
#include <stdint.h>
#include <inttypes.h>
+#include <rte_debug.h>
#include <rte_interrupts.h>
+#include <rte_dev.h>
TAILQ_HEAD(pci_device_list, rte_pci_device); /**< PCI devices in D-linked Q. */
TAILQ_HEAD(pci_driver_list, rte_pci_driver); /**< PCI drivers in D-linked Q. */
@@ -95,6 +97,7 @@ const char *pci_get_sysfs_path(void);
/** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */
#define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
+#define PCI_PRI_STR_SIZE sizeof("XXXX:XX:XX.X")
/** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */
#define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
@@ -105,15 +108,6 @@ const char *pci_get_sysfs_path(void);
/** Nb. of values in PCI resource format. */
#define PCI_RESOURCE_FMT_NVAL 3
-/**
- * A structure describing a PCI resource.
- */
-struct rte_pci_resource {
- uint64_t phys_addr; /**< Physical address, 0 if no resource. */
- uint64_t len; /**< Length of the resource. */
- void *addr; /**< Virtual address, NULL when not mapped. */
-};
-
/** Maximum number of PCI resources. */
#define PCI_MAX_RESOURCE 6
@@ -155,14 +149,14 @@ enum rte_kernel_driver {
*/
struct rte_pci_device {
TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */
+ struct rte_device device; /**< Inherit core device */
struct rte_pci_addr addr; /**< PCI location. */
struct rte_pci_id id; /**< PCI ID. */
- struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE]; /**< PCI Memory Resource */
+ struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE];
+ /**< PCI Memory Resource */
struct rte_intr_handle intr_handle; /**< Interrupt handle */
struct rte_pci_driver *driver; /**< Associated driver */
uint16_t max_vfs; /**< sriov enable if not zero */
- int numa_node; /**< NUMA node connection */
- struct rte_devargs *devargs; /**< Device user arguments */
enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */
};
@@ -193,21 +187,21 @@ struct rte_pci_driver;
/**
* Initialisation function for the driver called during PCI probing.
*/
-typedef int (pci_devinit_t)(struct rte_pci_driver *, struct rte_pci_device *);
+typedef int (pci_probe_t)(struct rte_pci_driver *, struct rte_pci_device *);
/**
* Uninitialisation function for the driver called during hotplugging.
*/
-typedef int (pci_devuninit_t)(struct rte_pci_device *);
+typedef int (pci_remove_t)(struct rte_pci_device *);
/**
* A structure describing a PCI driver.
*/
struct rte_pci_driver {
TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */
- const char *name; /**< Driver name. */
- pci_devinit_t *devinit; /**< Device init. function. */
- pci_devuninit_t *devuninit; /**< Device uninit function. */
+ struct rte_driver driver; /**< Inherit core driver. */
+ pci_probe_t *probe; /**< Device Probe function. */
+ pci_remove_t *remove; /**< Device Remove function. */
const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */
uint32_t drv_flags; /**< Flags contolling handling of device. */
};
@@ -308,6 +302,28 @@ eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr)
}
#undef GET_PCIADDR_FIELD
+/**
+ * Utility function to write a pci device name, this device name can later be
+ * used to retrieve the corresponding rte_pci_addr using eal_parse_pci_*
+ * BDF helpers.
+ *
+ * @param addr
+ * The PCI Bus-Device-Function address
+ * @param output
+ * The output buffer string
+ * @param size
+ * The output buffer size
+ */
+static inline void
+rte_eal_pci_device_name(const struct rte_pci_addr *addr,
+ char *output, size_t size)
+{
+ RTE_VERIFY(size >= PCI_PRI_STR_SIZE);
+ RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT,
+ addr->domain, addr->bus,
+ addr->devid, addr->function) >= 0);
+}
+
/* Compare two PCI device addresses. */
/**
* Utility function to compare two PCI device addresses.
@@ -442,7 +458,7 @@ int rte_eal_pci_probe_one(const struct rte_pci_addr *addr);
* Close the single PCI device.
*
* Scan the content of the PCI bus, and find the pci device specified by pci
- * address, then call the devuninit() function for registered driver that has a
+ * address, then call the remove() function for registered driver that has a
* matching entry in its id_table for discovered device.
*
* @param addr
@@ -470,6 +486,16 @@ void rte_eal_pci_dump(FILE *f);
*/
void rte_eal_pci_register(struct rte_pci_driver *driver);
+/** Helper for PCI device registration from driver (eth, crypto) instance */
+#define RTE_PMD_REGISTER_PCI(nm, pci_drv) \
+RTE_INIT(pciinitfn_ ##nm); \
+static void pciinitfn_ ##nm(void) \
+{\
+ (pci_drv).driver.name = RTE_STR(nm);\
+ rte_eal_pci_register(&pci_drv); \
+} \
+RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
+
/**
* Unregister a PCI driver.
*
diff --git a/lib/librte_eal/common/include/rte_pci_dev_ids.h b/lib/librte_eal/common/include/rte_pci_dev_ids.h
deleted file mode 100644
index 6ec8ae8c..00000000
--- a/lib/librte_eal/common/include/rte_pci_dev_ids.h
+++ /dev/null
@@ -1,326 +0,0 @@
-/*-
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- * The full GNU General Public License is included in this distribution
- * in the file called LICENSE.GPL.
- *
- * Contact Information:
- * Intel Corporation
- *
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef RTE_PCI_DEV_ID_DECL_IGB
-#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev)
-#endif
-
-#ifndef RTE_PCI_DEV_ID_DECL_IGBVF
-#define RTE_PCI_DEV_ID_DECL_IGBVF(vend, dev)
-#endif
-
-#ifndef RTE_PCI_DEV_ID_DECL_IXGBE
-#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev)
-#endif
-
-#ifndef RTE_PCI_DEV_ID_DECL_IXGBEVF
-#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev)
-#endif
-
-#ifndef PCI_VENDOR_ID_INTEL
-/** Vendor ID used by Intel devices */
-#define PCI_VENDOR_ID_INTEL 0x8086
-#endif
-
-/******************** Physical IGB devices from e1000_hw.h ********************/
-
-#define E1000_DEV_ID_82576 0x10C9
-#define E1000_DEV_ID_82576_FIBER 0x10E6
-#define E1000_DEV_ID_82576_SERDES 0x10E7
-#define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8
-#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526
-#define E1000_DEV_ID_82576_NS 0x150A
-#define E1000_DEV_ID_82576_NS_SERDES 0x1518
-#define E1000_DEV_ID_82576_SERDES_QUAD 0x150D
-#define E1000_DEV_ID_82575EB_COPPER 0x10A7
-#define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9
-#define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6
-#define E1000_DEV_ID_82580_COPPER 0x150E
-#define E1000_DEV_ID_82580_FIBER 0x150F
-#define E1000_DEV_ID_82580_SERDES 0x1510
-#define E1000_DEV_ID_82580_SGMII 0x1511
-#define E1000_DEV_ID_82580_COPPER_DUAL 0x1516
-#define E1000_DEV_ID_82580_QUAD_FIBER 0x1527
-#define E1000_DEV_ID_I350_COPPER 0x1521
-#define E1000_DEV_ID_I350_FIBER 0x1522
-#define E1000_DEV_ID_I350_SERDES 0x1523
-#define E1000_DEV_ID_I350_SGMII 0x1524
-#define E1000_DEV_ID_I350_DA4 0x1546
-#define E1000_DEV_ID_I210_COPPER 0x1533
-#define E1000_DEV_ID_I210_COPPER_OEM1 0x1534
-#define E1000_DEV_ID_I210_COPPER_IT 0x1535
-#define E1000_DEV_ID_I210_FIBER 0x1536
-#define E1000_DEV_ID_I210_SERDES 0x1537
-#define E1000_DEV_ID_I210_SGMII 0x1538
-#define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B
-#define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C
-#define E1000_DEV_ID_I211_COPPER 0x1539
-#define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40
-#define E1000_DEV_ID_I354_SGMII 0x1F41
-#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45
-#define E1000_DEV_ID_DH89XXCC_SGMII 0x0438
-#define E1000_DEV_ID_DH89XXCC_SERDES 0x043A
-#define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C
-#define E1000_DEV_ID_DH89XXCC_SFP 0x0440
-
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_FIBER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES_QUAD)
-
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER)
-
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_FIBER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SGMII)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER_DUAL)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_QUAD_FIBER)
-
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_FIBER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SGMII)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_DA4)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_OEM1)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_IT)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_FIBER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SGMII)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I211_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_SGMII)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SGMII)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SFP)
-
-/****************** Physical IXGBE devices from ixgbe_type.h ******************/
-
-#define IXGBE_DEV_ID_82598 0x10B6
-#define IXGBE_DEV_ID_82598_BX 0x1508
-#define IXGBE_DEV_ID_82598AF_DUAL_PORT 0x10C6
-#define IXGBE_DEV_ID_82598AF_SINGLE_PORT 0x10C7
-#define IXGBE_DEV_ID_82598AT 0x10C8
-#define IXGBE_DEV_ID_82598AT2 0x150B
-#define IXGBE_DEV_ID_82598EB_SFP_LOM 0x10DB
-#define IXGBE_DEV_ID_82598EB_CX4 0x10DD
-#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT 0x10EC
-#define IXGBE_DEV_ID_82598_DA_DUAL_PORT 0x10F1
-#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM 0x10E1
-#define IXGBE_DEV_ID_82598EB_XF_LR 0x10F4
-#define IXGBE_DEV_ID_82599_KX4 0x10F7
-#define IXGBE_DEV_ID_82599_KX4_MEZZ 0x1514
-#define IXGBE_DEV_ID_82599_KR 0x1517
-#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE 0x10F8
-#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ 0x000C
-#define IXGBE_DEV_ID_82599_CX4 0x10F9
-#define IXGBE_DEV_ID_82599_SFP 0x10FB
-#define IXGBE_SUBDEV_ID_82599_SFP 0x11A9
-#define IXGBE_SUBDEV_ID_82599_RNDC 0x1F72
-#define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0
-#define IXGBE_SUBDEV_ID_82599_ECNA_DP 0x0470
-#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152A
-#define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529
-#define IXGBE_DEV_ID_82599_SFP_EM 0x1507
-#define IXGBE_DEV_ID_82599_SFP_SF2 0x154D
-#define IXGBE_DEV_ID_82599_SFP_SF_QP 0x154A
-#define IXGBE_DEV_ID_82599_QSFP_SF_QP 0x1558
-#define IXGBE_DEV_ID_82599EN_SFP 0x1557
-#define IXGBE_DEV_ID_82599_XAUI_LOM 0x10FC
-#define IXGBE_DEV_ID_82599_T3_LOM 0x151C
-#define IXGBE_DEV_ID_82599_LS 0x154F
-#define IXGBE_DEV_ID_X540T 0x1528
-#define IXGBE_DEV_ID_X540T1 0x1560
-#define IXGBE_DEV_ID_X550EM_X_SFP 0x15AC
-#define IXGBE_DEV_ID_X550EM_X_10G_T 0x15AD
-#define IXGBE_DEV_ID_X550EM_X_1G_T 0x15AE
-#define IXGBE_DEV_ID_X550T 0x1563
-#define IXGBE_DEV_ID_X550T1 0x15D1
-#define IXGBE_DEV_ID_X550EM_A_KR 0x15C2
-#define IXGBE_DEV_ID_X550EM_A_KR_L 0x15C3
-#define IXGBE_DEV_ID_X550EM_A_SFP_N 0x15C4
-#define IXGBE_DEV_ID_X550EM_A_SGMII 0x15C6
-#define IXGBE_DEV_ID_X550EM_A_SGMII_L 0x15C7
-#define IXGBE_DEV_ID_X550EM_A_10G_T 0x15C8
-#define IXGBE_DEV_ID_X550EM_A_QSFP 0x15CA
-#define IXGBE_DEV_ID_X550EM_A_QSFP_N 0x15CC
-#define IXGBE_DEV_ID_X550EM_A_SFP 0x15CE
-#define IXGBE_DEV_ID_X550EM_A_1G_T 0x15E4
-#define IXGBE_DEV_ID_X550EM_A_1G_T_L 0x15E5
-#define IXGBE_DEV_ID_X550EM_X_KX4 0x15AA
-#define IXGBE_DEV_ID_X550EM_X_KR 0x15AB
-
-#ifdef RTE_NIC_BYPASS
-#define IXGBE_DEV_ID_82599_BYPASS 0x155D
-#endif
-
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_BX)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
- IXGBE_DEV_ID_82598AF_SINGLE_PORT)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT2)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_CX4)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
- IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_XF_LR)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KR)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
- IXGBE_DEV_ID_82599_COMBO_BACKPLANE)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
- IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_CX4)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_SFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_RNDC)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_560FLR)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_ECNA_DP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_FCOE)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_EM)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF2)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF_QP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599EN_SFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_XAUI_LOM)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_T3_LOM)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_LS)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T1)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_SFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_10G_T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_1G_T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T1)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR_L)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SGMII)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SGMII_L)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_10G_T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP_N)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KX4)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KR)
-
-#ifdef RTE_NIC_BYPASS
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BYPASS)
-#endif
-
-/****************** Virtual IGB devices from e1000_hw.h ******************/
-
-#define E1000_DEV_ID_82576_VF 0x10CA
-#define E1000_DEV_ID_82576_VF_HV 0x152D
-#define E1000_DEV_ID_I350_VF 0x1520
-#define E1000_DEV_ID_I350_VF_HV 0x152F
-
-RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF)
-RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF_HV)
-RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF)
-RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF_HV)
-
-/****************** Virtual IXGBE devices from ixgbe_type.h ******************/
-
-#define IXGBE_DEV_ID_82599_VF 0x10ED
-#define IXGBE_DEV_ID_82599_VF_HV 0x152E
-#define IXGBE_DEV_ID_X540_VF 0x1515
-#define IXGBE_DEV_ID_X540_VF_HV 0x1530
-#define IXGBE_DEV_ID_X550_VF_HV 0x1564
-#define IXGBE_DEV_ID_X550_VF 0x1565
-#define IXGBE_DEV_ID_X550EM_A_VF 0x15C5
-#define IXGBE_DEV_ID_X550EM_A_VF_HV 0x15B4
-#define IXGBE_DEV_ID_X550EM_X_VF 0x15A8
-#define IXGBE_DEV_ID_X550EM_X_VF_HV 0x15A9
-
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF_HV)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF_HV)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF_HV)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF_HV)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV)
-
-/*
- * Undef all RTE_PCI_DEV_ID_DECL_* here.
- */
-#undef RTE_PCI_DEV_ID_DECL_IGB
-#undef RTE_PCI_DEV_ID_DECL_IGBVF
-#undef RTE_PCI_DEV_ID_DECL_IXGBE
-#undef RTE_PCI_DEV_ID_DECL_IXGBEVF
diff --git a/lib/librte_eal/common/include/rte_tailq.h b/lib/librte_eal/common/include/rte_tailq.h
index cc3c0f1d..3aae098a 100644
--- a/lib/librte_eal/common/include/rte_tailq.h
+++ b/lib/librte_eal/common/include/rte_tailq.h
@@ -107,7 +107,7 @@ struct rte_tailq_elem {
RTE_TAILQ_CAST(rte_eal_tailq_lookup(name), struct_name)
/**
- * Dump tail queues to the console.
+ * Dump tail queues to a file.
*
* @param f
* A pointer to a file for output
@@ -148,8 +148,8 @@ struct rte_tailq_head *rte_eal_tailq_lookup(const char *name);
int rte_eal_tailq_register(struct rte_tailq_elem *t);
#define EAL_REGISTER_TAILQ(t) \
-void tailqinitfn_ ##t(void); \
-void __attribute__((constructor, used)) tailqinitfn_ ##t(void) \
+RTE_INIT(tailqinitfn_ ##t); \
+static void tailqinitfn_ ##t(void) \
{ \
if (rte_eal_tailq_register(&t) < 0) \
rte_panic("Cannot initialize tailq: %s\n", t.name); \
diff --git a/lib/librte_eal/common/include/rte_time.h b/lib/librte_eal/common/include/rte_time.h
index 4b13b9c1..28c6274c 100644
--- a/lib/librte_eal/common/include/rte_time.h
+++ b/lib/librte_eal/common/include/rte_time.h
@@ -31,6 +31,12 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#ifndef _RTE_TIME_H_
+#define _RTE_TIME_H_
+
+#include <stdint.h>
+#include <time.h>
+
#define NSEC_PER_SEC 1000000000L
/**
@@ -120,3 +126,5 @@ rte_ns_to_timespec(uint64_t nsec)
return ts;
}
+
+#endif /* _RTE_TIME_H_ */
diff --git a/lib/librte_eal/common/include/rte_vdev.h b/lib/librte_eal/common/include/rte_vdev.h
new file mode 100644
index 00000000..784e837d
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_vdev.h
@@ -0,0 +1,102 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 RehiveTech. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_VDEV_H
+#define RTE_VDEV_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/queue.h>
+#include <rte_dev.h>
+
+/** Double linked list of virtual device drivers. */
+TAILQ_HEAD(vdev_driver_list, rte_vdev_driver);
+
+/**
+ * Probe function called for each virtual device driver once.
+ */
+typedef int (rte_vdev_probe_t)(const char *name, const char *args);
+
+/**
+ * Remove function called for each virtual device driver once.
+ */
+typedef int (rte_vdev_remove_t)(const char *name);
+
+/**
+ * A virtual device driver abstraction.
+ */
+struct rte_vdev_driver {
+ TAILQ_ENTRY(rte_vdev_driver) next; /**< Next in list. */
+ struct rte_driver driver; /**< Inherited general driver. */
+ rte_vdev_probe_t *probe; /**< Virtual device probe function. */
+ rte_vdev_remove_t *remove; /**< Virtual device remove function. */
+};
+
+/**
+ * Register a virtual device driver.
+ *
+ * @param driver
+ * A pointer to a rte_vdev_driver structure describing the driver
+ * to be registered.
+ */
+void rte_eal_vdrv_register(struct rte_vdev_driver *driver);
+
+/**
+ * Unregister a virtual device driver.
+ *
+ * @param driver
+ * A pointer to a rte_vdev_driver structure describing the driver
+ * to be unregistered.
+ */
+void rte_eal_vdrv_unregister(struct rte_vdev_driver *driver);
+
+#define RTE_PMD_REGISTER_VDEV(nm, vdrv)\
+RTE_INIT(vdrvinitfn_ ##vdrv);\
+static const char *vdrvinit_ ## nm ## _alias;\
+static void vdrvinitfn_ ##vdrv(void)\
+{\
+ (vdrv).driver.name = RTE_STR(nm);\
+ (vdrv).driver.alias = vdrvinit_ ## nm ## _alias;\
+ rte_eal_vdrv_register(&vdrv);\
+} \
+RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
+
+#define RTE_PMD_REGISTER_ALIAS(nm, alias)\
+static const char *vdrvinit_ ## nm ## _alias = RTE_STR(alias)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index 8187dc7b..da204e63 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -45,6 +45,7 @@ extern "C" {
#include <stdint.h>
#include <string.h>
+#include <stdio.h>
#include <rte_common.h>
/**
@@ -60,12 +61,12 @@ extern "C" {
/**
* Minor version/month number i.e. the mm in yy.mm.z
*/
-#define RTE_VER_MONTH 7
+#define RTE_VER_MONTH 11
/**
* Patch level number i.e. the z in yy.mm.z
*/
-#define RTE_VER_MINOR 2
+#define RTE_VER_MINOR 0
/**
* Extra string to be appended to version number
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index 763fa324..267a4c6c 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -221,14 +221,6 @@ rte_eal_malloc_heap_init(void)
for (ms = &mcfg->memseg[0], ms_cnt = 0;
(ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0);
ms_cnt++, ms++) {
-#ifdef RTE_LIBRTE_IVSHMEM
- /*
- * if segment has ioremap address set, it's an IVSHMEM segment and
- * it is not memory to allocate from.
- */
- if (ms->ioremap_addr != 0)
- continue;
-#endif
malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms);
}
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 182729c0..4e206f09 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -37,19 +37,13 @@ ARCH_DIR ?= $(RTE_ARCH)
EXPORT_MAP := rte_eal_version.map
VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
-LIBABIVER := 2
+LIBABIVER := 3
VPATH += $(RTE_SDK)/lib/librte_eal/common
CFLAGS += -I$(SRCDIR)/include
CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
-ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
-# workaround for circular dependency eal -> ivshmem -> ring/mempool -> eal
-CFLAGS += -I$(RTE_SDK)/lib/librte_ring
-CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
-CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem
-endif
CFLAGS += $(WERROR_FLAGS) -O3
LDLIBS += -ldl
@@ -76,9 +70,6 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c
-ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_ivshmem.c
-endif
# from common dir
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c
@@ -86,6 +77,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memzone.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_log.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_launch.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_vdev.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci_uio.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memory.c
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 3fb2188f..2075282e 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -70,6 +70,7 @@
#include <rte_cpuflags.h>
#include <rte_interrupts.h>
#include <rte_pci.h>
+#include <rte_dev.h>
#include <rte_devargs.h>
#include <rte_common.h>
#include <rte_version.h>
@@ -238,7 +239,8 @@ rte_eal_config_attach(void)
mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config),
PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
if (mem_config == MAP_FAILED)
- rte_panic("Cannot mmap memory for rte_config\n");
+ rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n",
+ errno, strerror(errno));
rte_config.mem_config = mem_config;
}
@@ -263,9 +265,17 @@ rte_eal_config_reattach(void)
mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr,
sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED,
mem_cfg_fd, 0);
+ if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) {
+ if (mem_config != MAP_FAILED)
+ /* errno is stale, don't use */
+ rte_panic("Cannot mmap memory for rte_config at [%p], got [%p]"
+ " - please use '--base-virtaddr' option\n",
+ rte_mem_cfg_addr, mem_config);
+ else
+ rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n",
+ errno, strerror(errno));
+ }
close(mem_cfg_fd);
- if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr)
- rte_panic("Cannot mmap memory for rte_config\n");
rte_config.mem_config = mem_config;
}
@@ -740,6 +750,9 @@ rte_eal_init(int argc, char **argv)
char cpuset[RTE_CPU_AFFINITY_STR_LEN];
char thread_name[RTE_MAX_THREAD_NAME_LEN];
+ /* checks if the machine is adequate */
+ rte_cpu_check_supported();
+
if (!rte_atomic32_test_and_set(&run_once))
return -1;
@@ -748,9 +761,6 @@ rte_eal_init(int argc, char **argv)
thread_id = pthread_self();
- if (rte_eal_log_early_init() < 0)
- rte_panic("Cannot init early logs\n");
-
eal_log_level_parse(argc, argv);
/* set log level as early as possible */
@@ -789,6 +799,9 @@ rte_eal_init(int argc, char **argv)
rte_config_init();
+ if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0)
+ rte_panic("Cannot init logs\n");
+
if (rte_eal_pci_init() < 0)
rte_panic("Cannot init PCI\n");
@@ -797,11 +810,6 @@ rte_eal_init(int argc, char **argv)
rte_panic("Cannot init VFIO\n");
#endif
-#ifdef RTE_LIBRTE_IVSHMEM
- if (rte_eal_ivshmem_init() < 0)
- rte_panic("Cannot init IVSHMEM\n");
-#endif
-
if (rte_eal_memory_init() < 0)
rte_panic("Cannot init memory\n");
@@ -814,14 +822,6 @@ rte_eal_init(int argc, char **argv)
if (rte_eal_tailqs_init() < 0)
rte_panic("Cannot init tail queues for objects\n");
-#ifdef RTE_LIBRTE_IVSHMEM
- if (rte_eal_ivshmem_obj_init() < 0)
- rte_panic("Cannot init IVSHMEM objects\n");
-#endif
-
- if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0)
- rte_panic("Cannot init logs\n");
-
if (rte_eal_alarm_init() < 0)
rte_panic("Cannot init interrupt-handling thread\n");
diff --git a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c
deleted file mode 100644
index 67b3caf2..00000000
--- a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c
+++ /dev/null
@@ -1,954 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifdef RTE_LIBRTE_IVSHMEM /* hide it from coverage */
-
-#include <stdint.h>
-#include <unistd.h>
-#include <inttypes.h>
-#include <sys/mman.h>
-#include <sys/file.h>
-#include <string.h>
-#include <sys/queue.h>
-
-#include <rte_log.h>
-#include <rte_pci.h>
-#include <rte_memory.h>
-#include <rte_eal.h>
-#include <rte_eal_memconfig.h>
-#include <rte_string_fns.h>
-#include <rte_errno.h>
-#include <rte_ring.h>
-#include <rte_malloc.h>
-#include <rte_common.h>
-#include <rte_ivshmem.h>
-
-#include "eal_internal_cfg.h"
-#include "eal_private.h"
-
-#define PCI_VENDOR_ID_IVSHMEM 0x1Af4
-#define PCI_DEVICE_ID_IVSHMEM 0x1110
-
-#define IVSHMEM_MAGIC 0x0BADC0DE
-
-#define IVSHMEM_RESOURCE_PATH "/sys/bus/pci/devices/%04x:%02x:%02x.%x/resource2"
-#define IVSHMEM_CONFIG_PATH "/var/run/.%s_ivshmem_config"
-
-#define PHYS 0x1
-#define VIRT 0x2
-#define IOREMAP 0x4
-#define FULL (PHYS|VIRT|IOREMAP)
-
-#define METADATA_SIZE_ALIGNED \
- (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz))
-
-#define CONTAINS(x,y)\
- (((y).addr_64 >= (x).addr_64) && ((y).addr_64 < (x).addr_64 + (x).len))
-
-#define DIM(x) (sizeof(x)/sizeof(x[0]))
-
-struct ivshmem_pci_device {
- char path[PATH_MAX];
- phys_addr_t ioremap_addr;
-};
-
-/* data type to store in config */
-struct ivshmem_segment {
- struct rte_ivshmem_metadata_entry entry;
- uint64_t align;
- char path[PATH_MAX];
-};
-struct ivshmem_shared_config {
- struct ivshmem_segment segment[RTE_MAX_MEMSEG];
- uint32_t segment_idx;
- struct ivshmem_pci_device pci_devs[RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS];
- uint32_t pci_devs_idx;
-};
-static struct ivshmem_shared_config * ivshmem_config;
-static int memseg_idx;
-static int pagesz;
-
-/* Tailq heads to add rings to */
-TAILQ_HEAD(rte_ring_list, rte_tailq_entry);
-
-/*
- * Utility functions
- */
-
-static int
-is_ivshmem_device(struct rte_pci_device * dev)
-{
- return dev->id.vendor_id == PCI_VENDOR_ID_IVSHMEM
- && dev->id.device_id == PCI_DEVICE_ID_IVSHMEM;
-}
-
-static void *
-map_metadata(int fd, uint64_t len)
-{
- size_t metadata_len = sizeof(struct rte_ivshmem_metadata);
- size_t aligned_len = METADATA_SIZE_ALIGNED;
-
- return mmap(NULL, metadata_len, PROT_READ | PROT_WRITE,
- MAP_SHARED, fd, len - aligned_len);
-}
-
-static void
-unmap_metadata(void * ptr)
-{
- munmap(ptr, sizeof(struct rte_ivshmem_metadata));
-}
-
-static int
-has_ivshmem_metadata(int fd, uint64_t len)
-{
- struct rte_ivshmem_metadata metadata;
- void * ptr;
-
- ptr = map_metadata(fd, len);
-
- if (ptr == MAP_FAILED)
- return -1;
-
- metadata = *(struct rte_ivshmem_metadata*) (ptr);
-
- unmap_metadata(ptr);
-
- return metadata.magic_number == IVSHMEM_MAGIC;
-}
-
-static void
-remove_segment(struct ivshmem_segment * ms, int len, int idx)
-{
- int i;
-
- for (i = idx; i < len - 1; i++)
- memcpy(&ms[i], &ms[i+1], sizeof(struct ivshmem_segment));
- memset(&ms[len-1], 0, sizeof(struct ivshmem_segment));
-}
-
-static int
-overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
-{
- uint64_t start1, end1, start2, end2;
- uint64_t p_start1, p_end1, p_start2, p_end2;
- uint64_t i_start1, i_end1, i_start2, i_end2;
- int result = 0;
-
- /* gather virtual addresses */
- start1 = mz1->addr_64;
- end1 = mz1->addr_64 + mz1->len;
- start2 = mz2->addr_64;
- end2 = mz2->addr_64 + mz2->len;
-
- /* gather physical addresses */
- p_start1 = mz1->phys_addr;
- p_end1 = mz1->phys_addr + mz1->len;
- p_start2 = mz2->phys_addr;
- p_end2 = mz2->phys_addr + mz2->len;
-
- /* gather ioremap addresses */
- i_start1 = mz1->ioremap_addr;
- i_end1 = mz1->ioremap_addr + mz1->len;
- i_start2 = mz2->ioremap_addr;
- i_end2 = mz2->ioremap_addr + mz2->len;
-
- /* check for overlap in virtual addresses */
- if (start1 >= start2 && start1 < end2)
- result |= VIRT;
- if (start2 >= start1 && start2 < end1)
- result |= VIRT;
-
- /* check for overlap in physical addresses */
- if (p_start1 >= p_start2 && p_start1 < p_end2)
- result |= PHYS;
- if (p_start2 >= p_start1 && p_start2 < p_end1)
- result |= PHYS;
-
- /* check for overlap in ioremap addresses */
- if (i_start1 >= i_start2 && i_start1 < i_end2)
- result |= IOREMAP;
- if (i_start2 >= i_start1 && i_start2 < i_end1)
- result |= IOREMAP;
-
- return result;
-}
-
-static int
-adjacent(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
-{
- uint64_t start1, end1, start2, end2;
- uint64_t p_start1, p_end1, p_start2, p_end2;
- uint64_t i_start1, i_end1, i_start2, i_end2;
- int result = 0;
-
- /* gather virtual addresses */
- start1 = mz1->addr_64;
- end1 = mz1->addr_64 + mz1->len;
- start2 = mz2->addr_64;
- end2 = mz2->addr_64 + mz2->len;
-
- /* gather physical addresses */
- p_start1 = mz1->phys_addr;
- p_end1 = mz1->phys_addr + mz1->len;
- p_start2 = mz2->phys_addr;
- p_end2 = mz2->phys_addr + mz2->len;
-
- /* gather ioremap addresses */
- i_start1 = mz1->ioremap_addr;
- i_end1 = mz1->ioremap_addr + mz1->len;
- i_start2 = mz2->ioremap_addr;
- i_end2 = mz2->ioremap_addr + mz2->len;
-
- /* check if segments are virtually adjacent */
- if (start1 == end2)
- result |= VIRT;
- if (start2 == end1)
- result |= VIRT;
-
- /* check if segments are physically adjacent */
- if (p_start1 == p_end2)
- result |= PHYS;
- if (p_start2 == p_end1)
- result |= PHYS;
-
- /* check if segments are ioremap-adjacent */
- if (i_start1 == i_end2)
- result |= IOREMAP;
- if (i_start2 == i_end1)
- result |= IOREMAP;
-
- return result;
-}
-
-static int
-has_adjacent_segments(struct ivshmem_segment * ms, int len)
-{
- int i, j;
-
- for (i = 0; i < len; i++)
- for (j = i + 1; j < len; j++) {
- /* we're only interested in fully adjacent segments; partially
- * adjacent segments can coexist.
- */
- if (adjacent(&ms[i].entry.mz, &ms[j].entry.mz) == FULL)
- return 1;
- }
- return 0;
-}
-
-static int
-has_overlapping_segments(struct ivshmem_segment * ms, int len)
-{
- int i, j;
-
- for (i = 0; i < len; i++)
- for (j = i + 1; j < len; j++)
- if (overlap(&ms[i].entry.mz, &ms[j].entry.mz))
- return 1;
- return 0;
-}
-
-static int
-seg_compare(const void * a, const void * b)
-{
- const struct ivshmem_segment * s1 = (const struct ivshmem_segment*) a;
- const struct ivshmem_segment * s2 = (const struct ivshmem_segment*) b;
-
- /* move unallocated zones to the end */
- if (s1->entry.mz.addr == NULL && s2->entry.mz.addr == NULL)
- return 0;
- if (s1->entry.mz.addr == 0)
- return 1;
- if (s2->entry.mz.addr == 0)
- return -1;
-
- return s1->entry.mz.phys_addr > s2->entry.mz.phys_addr;
-}
-
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
-static void
-entry_dump(struct rte_ivshmem_metadata_entry *e)
-{
- RTE_LOG(DEBUG, EAL, "\tvirt: %p-%p\n", e->mz.addr,
- RTE_PTR_ADD(e->mz.addr, e->mz.len));
- RTE_LOG(DEBUG, EAL, "\tphys: 0x%" PRIx64 "-0x%" PRIx64 "\n",
- e->mz.phys_addr,
- e->mz.phys_addr + e->mz.len);
- RTE_LOG(DEBUG, EAL, "\tio: 0x%" PRIx64 "-0x%" PRIx64 "\n",
- e->mz.ioremap_addr,
- e->mz.ioremap_addr + e->mz.len);
- RTE_LOG(DEBUG, EAL, "\tlen: 0x%" PRIx64 "\n", e->mz.len);
- RTE_LOG(DEBUG, EAL, "\toff: 0x%" PRIx64 "\n", e->offset);
-}
-#endif
-
-
-
-/*
- * Actual useful code
- */
-
-/* read through metadata mapped from the IVSHMEM device */
-static int
-read_metadata(char * path, int path_len, int fd, uint64_t flen)
-{
- struct rte_ivshmem_metadata metadata;
- struct rte_ivshmem_metadata_entry * entry;
- int idx, i;
- void * ptr;
-
- ptr = map_metadata(fd, flen);
-
- if (ptr == MAP_FAILED)
- return -1;
-
- metadata = *(struct rte_ivshmem_metadata*) (ptr);
-
- unmap_metadata(ptr);
-
- RTE_LOG(DEBUG, EAL, "Parsing metadata for \"%s\"\n", metadata.name);
-
- idx = ivshmem_config->segment_idx;
-
- for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES &&
- idx <= RTE_MAX_MEMSEG; i++) {
-
- if (idx == RTE_MAX_MEMSEG) {
- RTE_LOG(ERR, EAL, "Not enough memory segments!\n");
- return -1;
- }
-
- entry = &metadata.entry[i];
-
- /* stop on uninitialized memzone */
- if (entry->mz.len == 0)
- break;
-
- /* copy metadata entry */
- memcpy(&ivshmem_config->segment[idx].entry, entry,
- sizeof(struct rte_ivshmem_metadata_entry));
-
- /* copy path */
- snprintf(ivshmem_config->segment[idx].path, path_len, "%s", path);
-
- idx++;
- }
- ivshmem_config->segment_idx = idx;
-
- return 0;
-}
-
-/* check through each segment and look for adjacent or overlapping ones. */
-static int
-cleanup_segments(struct ivshmem_segment * ms, int tbl_len)
-{
- struct ivshmem_segment * s, * tmp;
- int i, j, concat, seg_adjacent, seg_overlapping;
- uint64_t start1, start2, end1, end2, p_start1, p_start2, i_start1, i_start2;
-
- qsort(ms, tbl_len, sizeof(struct ivshmem_segment),
- seg_compare);
-
- while (has_overlapping_segments(ms, tbl_len) ||
- has_adjacent_segments(ms, tbl_len)) {
-
- for (i = 0; i < tbl_len; i++) {
- s = &ms[i];
-
- concat = 0;
-
- for (j = i + 1; j < tbl_len; j++) {
- tmp = &ms[j];
-
- /* check if this segment is overlapping with existing segment,
- * or is adjacent to existing segment */
- seg_overlapping = overlap(&s->entry.mz, &tmp->entry.mz);
- seg_adjacent = adjacent(&s->entry.mz, &tmp->entry.mz);
-
- /* check if segments fully overlap or are fully adjacent */
- if ((seg_adjacent == FULL) || (seg_overlapping == FULL)) {
-
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
- RTE_LOG(DEBUG, EAL, "Concatenating segments\n");
- RTE_LOG(DEBUG, EAL, "Segment %i:\n", i);
- entry_dump(&s->entry);
- RTE_LOG(DEBUG, EAL, "Segment %i:\n", j);
- entry_dump(&tmp->entry);
-#endif
-
- start1 = s->entry.mz.addr_64;
- start2 = tmp->entry.mz.addr_64;
- p_start1 = s->entry.mz.phys_addr;
- p_start2 = tmp->entry.mz.phys_addr;
- i_start1 = s->entry.mz.ioremap_addr;
- i_start2 = tmp->entry.mz.ioremap_addr;
- end1 = s->entry.mz.addr_64 + s->entry.mz.len;
- end2 = tmp->entry.mz.addr_64 + tmp->entry.mz.len;
-
- /* settle for minimum start address and maximum length */
- s->entry.mz.addr_64 = RTE_MIN(start1, start2);
- s->entry.mz.phys_addr = RTE_MIN(p_start1, p_start2);
- s->entry.mz.ioremap_addr = RTE_MIN(i_start1, i_start2);
- s->entry.offset = RTE_MIN(s->entry.offset, tmp->entry.offset);
- s->entry.mz.len = RTE_MAX(end1, end2) - s->entry.mz.addr_64;
- concat = 1;
-
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
- RTE_LOG(DEBUG, EAL, "Resulting segment:\n");
- entry_dump(&s->entry);
-
-#endif
- }
- /* if segments not fully overlap, we have an error condition.
- * adjacent segments can coexist.
- */
- else if (seg_overlapping > 0) {
- RTE_LOG(ERR, EAL, "Segments %i and %i overlap!\n", i, j);
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
- RTE_LOG(DEBUG, EAL, "Segment %i:\n", i);
- entry_dump(&s->entry);
- RTE_LOG(DEBUG, EAL, "Segment %i:\n", j);
- entry_dump(&tmp->entry);
-#endif
- return -1;
- }
- if (concat)
- break;
- }
- /* if we concatenated, remove segment at j */
- if (concat) {
- remove_segment(ms, tbl_len, j);
- tbl_len--;
- break;
- }
- }
- }
-
- return tbl_len;
-}
-
-static int
-create_shared_config(void)
-{
- char path[PATH_MAX];
- int fd;
-
- /* build ivshmem config file path */
- snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH,
- internal_config.hugefile_prefix);
-
- fd = open(path, O_CREAT | O_RDWR, 0600);
-
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "Could not open %s: %s\n", path, strerror(errno));
- return -1;
- }
-
- /* try ex-locking first - if the file is locked, we have a problem */
- if (flock(fd, LOCK_EX | LOCK_NB) == -1) {
- RTE_LOG(ERR, EAL, "Locking %s failed: %s\n", path, strerror(errno));
- close(fd);
- return -1;
- }
-
- if (ftruncate(fd, sizeof(struct ivshmem_shared_config)) < 0) {
- RTE_LOG(ERR, EAL, "ftruncate failed: %s\n", strerror(errno));
- return -1;
- }
-
- ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config),
- PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-
- if (ivshmem_config == MAP_FAILED)
- return -1;
-
- memset(ivshmem_config, 0, sizeof(struct ivshmem_shared_config));
-
- /* change the exclusive lock we got earlier to a shared lock */
- if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
- RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno));
- return -1;
- }
-
- close(fd);
-
- return 0;
-}
-
-/* open shared config file and, if present, map the config.
- * having no config file is not an error condition, as we later check if
- * ivshmem_config is NULL (if it is, that means nothing was mapped). */
-static int
-open_shared_config(void)
-{
- char path[PATH_MAX];
- int fd;
-
- /* build ivshmem config file path */
- snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH,
- internal_config.hugefile_prefix);
-
- fd = open(path, O_RDONLY);
-
- /* if the file doesn't exist, just return success */
- if (fd < 0 && errno == ENOENT)
- return 0;
- /* else we have an error condition */
- else if (fd < 0) {
- RTE_LOG(ERR, EAL, "Could not open %s: %s\n",
- path, strerror(errno));
- return -1;
- }
-
- /* try ex-locking first - if the lock *does* succeed, this means it's a
- * stray config file, so it should be deleted.
- */
- if (flock(fd, LOCK_EX | LOCK_NB) != -1) {
-
- /* if we can't remove the file, something is wrong */
- if (unlink(path) < 0) {
- RTE_LOG(ERR, EAL, "Could not remove %s: %s\n", path,
- strerror(errno));
- return -1;
- }
-
- /* release the lock */
- flock(fd, LOCK_UN);
- close(fd);
-
- /* return success as having a stray config file is equivalent to not
- * having config file at all.
- */
- return 0;
- }
-
- ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config),
- PROT_READ, MAP_SHARED, fd, 0);
-
- if (ivshmem_config == MAP_FAILED)
- return -1;
-
- /* place a shared lock on config file */
- if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
- RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno));
- return -1;
- }
-
- close(fd);
-
- return 0;
-}
-
-/*
- * This function does the following:
- *
- * 1) Builds a table of ivshmem_segments with proper offset alignment
- * 2) Cleans up that table so that we don't have any overlapping or adjacent
- * memory segments
- * 3) Creates memsegs from this table and maps them into memory.
- */
-static inline int
-map_all_segments(void)
-{
- struct ivshmem_segment ms_tbl[RTE_MAX_MEMSEG];
- struct ivshmem_pci_device * pci_dev;
- struct rte_mem_config * mcfg;
- struct ivshmem_segment * seg;
- int fd, fd_zero;
- unsigned i, j;
- struct rte_memzone mz;
- struct rte_memseg ms;
- void * base_addr;
- uint64_t align, len;
- phys_addr_t ioremap_addr;
-
- ioremap_addr = 0;
-
- memset(ms_tbl, 0, sizeof(ms_tbl));
- memset(&mz, 0, sizeof(struct rte_memzone));
- memset(&ms, 0, sizeof(struct rte_memseg));
-
- /* first, build a table of memsegs to map, to avoid failed mmaps due to
- * overlaps
- */
- for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMSEG; i++) {
- if (i == RTE_MAX_MEMSEG) {
- RTE_LOG(ERR, EAL, "Too many segments requested!\n");
- return -1;
- }
-
- seg = &ivshmem_config->segment[i];
-
- /* copy segment to table */
- memcpy(&ms_tbl[i], seg, sizeof(struct ivshmem_segment));
-
- /* find ioremap addr */
- for (j = 0; j < DIM(ivshmem_config->pci_devs); j++) {
- pci_dev = &ivshmem_config->pci_devs[j];
- if (!strncmp(pci_dev->path, seg->path, sizeof(pci_dev->path))) {
- ioremap_addr = pci_dev->ioremap_addr;
- break;
- }
- }
- if (ioremap_addr == 0) {
- RTE_LOG(ERR, EAL, "Cannot find ioremap addr!\n");
- return -1;
- }
-
- /* work out alignments */
- align = seg->entry.mz.addr_64 -
- RTE_ALIGN_FLOOR(seg->entry.mz.addr_64, 0x1000);
- len = RTE_ALIGN_CEIL(seg->entry.mz.len + align, 0x1000);
-
- /* save original alignments */
- ms_tbl[i].align = align;
-
- /* create a memory zone */
- mz.addr_64 = seg->entry.mz.addr_64 - align;
- mz.len = len;
- mz.hugepage_sz = seg->entry.mz.hugepage_sz;
- mz.phys_addr = seg->entry.mz.phys_addr - align;
-
- /* find true physical address */
- mz.ioremap_addr = ioremap_addr + seg->entry.offset - align;
-
- ms_tbl[i].entry.offset = seg->entry.offset - align;
-
- memcpy(&ms_tbl[i].entry.mz, &mz, sizeof(struct rte_memzone));
- }
-
- /* clean up the segments */
- memseg_idx = cleanup_segments(ms_tbl, ivshmem_config->segment_idx);
-
- if (memseg_idx < 0)
- return -1;
-
- mcfg = rte_eal_get_configuration()->mem_config;
-
- fd_zero = open("/dev/zero", O_RDWR);
-
- if (fd_zero < 0) {
- RTE_LOG(ERR, EAL, "Cannot open /dev/zero: %s\n", strerror(errno));
- return -1;
- }
-
- /* create memsegs and put them into DPDK memory */
- for (i = 0; i < (unsigned) memseg_idx; i++) {
-
- seg = &ms_tbl[i];
-
- ms.addr_64 = seg->entry.mz.addr_64;
- ms.hugepage_sz = seg->entry.mz.hugepage_sz;
- ms.len = seg->entry.mz.len;
- ms.nchannel = rte_memory_get_nchannel();
- ms.nrank = rte_memory_get_nrank();
- ms.phys_addr = seg->entry.mz.phys_addr;
- ms.ioremap_addr = seg->entry.mz.ioremap_addr;
- ms.socket_id = seg->entry.mz.socket_id;
-
- base_addr = mmap(ms.addr, ms.len,
- PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_zero, 0);
-
- if (base_addr == MAP_FAILED || base_addr != ms.addr) {
- RTE_LOG(ERR, EAL, "Cannot map /dev/zero!\n");
- return -1;
- }
-
- fd = open(seg->path, O_RDWR);
-
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", seg->path,
- strerror(errno));
- return -1;
- }
-
- munmap(ms.addr, ms.len);
-
- base_addr = mmap(ms.addr, ms.len,
- PROT_READ | PROT_WRITE, MAP_SHARED, fd,
- seg->entry.offset);
-
-
- if (base_addr == MAP_FAILED || base_addr != ms.addr) {
- RTE_LOG(ERR, EAL, "Cannot map segment into memory: "
- "expected %p got %p (%s)\n", ms.addr, base_addr,
- strerror(errno));
- return -1;
- }
-
- RTE_LOG(DEBUG, EAL, "Memory segment mapped: %p (len %" PRIx64 ") at "
- "offset 0x%" PRIx64 "\n",
- ms.addr, ms.len, seg->entry.offset);
-
- /* put the pointers back into their real positions using original
- * alignment */
- ms.addr_64 += seg->align;
- ms.phys_addr += seg->align;
- ms.ioremap_addr += seg->align;
- ms.len -= seg->align;
-
- /* at this point, the rest of DPDK memory is not initialized, so we
- * expect memsegs to be empty */
- memcpy(&mcfg->memseg[i], &ms,
- sizeof(struct rte_memseg));
-
- close(fd);
-
- RTE_LOG(DEBUG, EAL, "IVSHMEM segment found, size: 0x%lx\n",
- ms.len);
- }
-
- return 0;
-}
-
-/* this happens at a later stage, after general EAL memory initialization */
-int
-rte_eal_ivshmem_obj_init(void)
-{
- struct rte_ring_list* ring_list = NULL;
- struct rte_mem_config * mcfg;
- struct ivshmem_segment * seg;
- struct rte_memzone * mz;
- struct rte_ring * r;
- struct rte_tailq_entry *te;
- unsigned i, ms, idx;
- uint64_t offset;
-
- /* secondary process would not need any object discovery - it'll all
- * already be in shared config */
- if (rte_eal_process_type() != RTE_PROC_PRIMARY || ivshmem_config == NULL)
- return 0;
-
- /* check that we have an initialised ring tail queue */
- ring_list = RTE_TAILQ_LOOKUP(RTE_TAILQ_RING_NAME, rte_ring_list);
- if (ring_list == NULL) {
- RTE_LOG(ERR, EAL, "No rte_ring tailq found!\n");
- return -1;
- }
-
- mcfg = rte_eal_get_configuration()->mem_config;
-
- /* create memzones */
- for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMZONE; i++) {
-
- seg = &ivshmem_config->segment[i];
-
- /* add memzone */
- if (mcfg->memzone_cnt == RTE_MAX_MEMZONE) {
- RTE_LOG(ERR, EAL, "No more memory zones available!\n");
- return -1;
- }
-
- idx = mcfg->memzone_cnt;
-
- RTE_LOG(DEBUG, EAL, "Found memzone: '%s' at %p (len 0x%" PRIx64 ")\n",
- seg->entry.mz.name, seg->entry.mz.addr, seg->entry.mz.len);
-
- memcpy(&mcfg->memzone[idx], &seg->entry.mz,
- sizeof(struct rte_memzone));
-
- /* find ioremap address */
- for (ms = 0; ms <= RTE_MAX_MEMSEG; ms++) {
- if (ms == RTE_MAX_MEMSEG) {
- RTE_LOG(ERR, EAL, "Physical address of segment not found!\n");
- return -1;
- }
- if (CONTAINS(mcfg->memseg[ms], mcfg->memzone[idx])) {
- offset = mcfg->memzone[idx].addr_64 -
- mcfg->memseg[ms].addr_64;
- mcfg->memzone[idx].ioremap_addr = mcfg->memseg[ms].ioremap_addr +
- offset;
- break;
- }
- }
-
- mcfg->memzone_cnt++;
- }
-
- rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
-
- /* find rings */
- for (i = 0; i < mcfg->memzone_cnt; i++) {
- mz = &mcfg->memzone[i];
-
- /* check if memzone has a ring prefix */
- if (strncmp(mz->name, RTE_RING_MZ_PREFIX,
- sizeof(RTE_RING_MZ_PREFIX) - 1) != 0)
- continue;
-
- r = (struct rte_ring*) (mz->addr_64);
-
- te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0);
- if (te == NULL) {
- RTE_LOG(ERR, EAL, "Cannot allocate ring tailq entry!\n");
- return -1;
- }
-
- te->data = (void *) r;
-
- TAILQ_INSERT_TAIL(ring_list, te, next);
-
- RTE_LOG(DEBUG, EAL, "Found ring: '%s' at %p\n", r->name, mz->addr);
- }
- rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
-
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
- rte_memzone_dump(stdout);
- rte_ring_list_dump(stdout);
-#endif
-
- return 0;
-}
-
-/* initialize ivshmem structures */
-int rte_eal_ivshmem_init(void)
-{
- struct rte_pci_device * dev;
- struct rte_pci_resource * res;
- int fd, ret;
- char path[PATH_MAX];
-
- /* initialize everything to 0 */
- memset(path, 0, sizeof(path));
- ivshmem_config = NULL;
-
- pagesz = getpagesize();
-
- RTE_LOG(DEBUG, EAL, "Searching for IVSHMEM devices...\n");
-
- if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
-
- if (open_shared_config() < 0) {
- RTE_LOG(ERR, EAL, "Could not open IVSHMEM config!\n");
- return -1;
- }
- }
- else {
-
- TAILQ_FOREACH(dev, &pci_device_list, next) {
-
- if (is_ivshmem_device(dev)) {
-
- /* IVSHMEM memory is always on BAR2 */
- res = &dev->mem_resource[2];
-
- /* if we don't have a BAR2 */
- if (res->len == 0)
- continue;
-
- /* construct pci device path */
- snprintf(path, sizeof(path), IVSHMEM_RESOURCE_PATH,
- dev->addr.domain, dev->addr.bus, dev->addr.devid,
- dev->addr.function);
-
- /* try to find memseg */
- fd = open(path, O_RDWR);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "Could not open %s\n", path);
- return -1;
- }
-
- /* check if it's a DPDK IVSHMEM device */
- ret = has_ivshmem_metadata(fd, res->len);
-
- /* is DPDK device */
- if (ret == 1) {
-
- /* config file creation is deferred until the first
- * DPDK device is found. then, it has to be created
- * only once. */
- if (ivshmem_config == NULL &&
- create_shared_config() < 0) {
- RTE_LOG(ERR, EAL, "Could not create IVSHMEM config!\n");
- close(fd);
- return -1;
- }
-
- if (read_metadata(path, sizeof(path), fd, res->len) < 0) {
- RTE_LOG(ERR, EAL, "Could not read metadata from"
- " device %02x:%02x.%x!\n", dev->addr.bus,
- dev->addr.devid, dev->addr.function);
- close(fd);
- return -1;
- }
-
- if (ivshmem_config->pci_devs_idx == RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS) {
- RTE_LOG(WARNING, EAL,
- "IVSHMEM PCI device limit exceeded. Increase "
- "CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS in "
- "your config file.\n");
- break;
- }
-
- RTE_LOG(INFO, EAL, "Found IVSHMEM device %02x:%02x.%x\n",
- dev->addr.bus, dev->addr.devid, dev->addr.function);
-
- ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].ioremap_addr = res->phys_addr;
- snprintf(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path,
- sizeof(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path),
- "%s", path);
-
- ivshmem_config->pci_devs_idx++;
- }
- /* failed to read */
- else if (ret < 0) {
- RTE_LOG(ERR, EAL, "Could not read IVSHMEM device: %s\n",
- strerror(errno));
- close(fd);
- return -1;
- }
- /* not a DPDK device */
- else
- RTE_LOG(DEBUG, EAL, "Skipping non-DPDK IVSHMEM device\n");
-
- /* close the BAR fd */
- close(fd);
- }
- }
- }
-
- /* ivshmem_config is not NULL only if config was created and/or mapped */
- if (ivshmem_config) {
- if (map_all_segments() < 0) {
- RTE_LOG(ERR, EAL, "Mapping IVSHMEM segments failed!\n");
- return -1;
- }
- }
- else {
- RTE_LOG(DEBUG, EAL, "No IVSHMEM configuration found! \n");
- }
-
- return 0;
-}
-
-#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c
index d3911004..e3a50aa3 100644
--- a/lib/librte_eal/linuxapp/eal/eal_log.c
+++ b/lib/librte_eal/linuxapp/eal/eal_log.c
@@ -97,45 +97,7 @@ rte_eal_log_init(const char *id, int facility)
openlog(id, LOG_NDELAY | LOG_PID, facility);
- if (rte_eal_common_log_init(log_stream) < 0)
- return -1;
-
- return 0;
-}
-
-/* early logs */
-
-/*
- * early log function, used before rte_eal_log_init
- */
-static ssize_t
-early_log_write(__attribute__((unused)) void *c, const char *buf, size_t size)
-{
- ssize_t ret;
- ret = fwrite(buf, size, 1, stdout);
- fflush(stdout);
- if (ret == 0)
- return -1;
- return ret;
-}
-
-static cookie_io_functions_t early_log_func = {
- .write = early_log_write,
-};
-static FILE *early_log_stream;
+ eal_log_set_default(log_stream);
-/*
- * init the log library, called by rte_eal_init() to enable early
- * logs
- */
-int
-rte_eal_log_early_init(void)
-{
- early_log_stream = fopencookie(NULL, "w+", early_log_func);
- if (early_log_stream == NULL) {
- printf("Cannot configure early_log_stream\n");
- return -1;
- }
- rte_openlog_stream(early_log_stream);
return 0;
}
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index c04cff0c..a956bb22 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -376,25 +376,15 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
void *vma_addr = NULL;
size_t vma_len = 0;
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
- RTE_SET_USED(vma_len);
-#endif
-
for (i = 0; i < hpi->num_pages[0]; i++) {
uint64_t hugepage_sz = hpi->hugepage_sz;
if (orig) {
hugepg_tbl[i].file_id = i;
hugepg_tbl[i].size = hugepage_sz;
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
- eal_get_hugefile_temp_path(hugepg_tbl[i].filepath,
- sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
- hugepg_tbl[i].file_id);
-#else
eal_get_hugefile_path(hugepg_tbl[i].filepath,
sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
hugepg_tbl[i].file_id);
-#endif
hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0';
}
#ifndef RTE_ARCH_64
@@ -408,8 +398,6 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
continue;
}
#endif
-
-#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS
else if (vma_len == 0) {
unsigned j, num_pages;
@@ -439,10 +427,9 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
if (vma_addr == NULL)
vma_len = hugepage_sz;
}
-#endif
/* try to create hugepage file */
- fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755);
+ fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0600);
if (fd < 0) {
RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__,
strerror(errno));
@@ -505,169 +492,6 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
return i;
}
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-
-/*
- * Remaps all hugepages into single file segments
- */
-static int
-remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
-{
- int fd;
- unsigned i = 0, j, num_pages, page_idx = 0;
- void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL;
- size_t vma_len = 0;
- size_t hugepage_sz = hpi->hugepage_sz;
- size_t total_size, offset;
- char filepath[MAX_HUGEPAGE_PATH];
- phys_addr_t physaddr;
- int socket;
-
- while (i < hpi->num_pages[0]) {
-
-#ifndef RTE_ARCH_64
- /* for 32-bit systems, don't remap 1G pages and 16G pages,
- * just reuse original map address as final map address.
- */
- if ((hugepage_sz == RTE_PGSIZE_1G)
- || (hugepage_sz == RTE_PGSIZE_16G)) {
- hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
- hugepg_tbl[i].orig_va = NULL;
- i++;
- continue;
- }
-#endif
-
- /* reserve a virtual area for next contiguous
- * physical block: count the number of
- * contiguous physical pages. */
- for (j = i+1; j < hpi->num_pages[0] ; j++) {
-#ifdef RTE_ARCH_PPC_64
- /* The physical addresses are sorted in descending
- * order on PPC64 */
- if (hugepg_tbl[j].physaddr !=
- hugepg_tbl[j-1].physaddr - hugepage_sz)
- break;
-#else
- if (hugepg_tbl[j].physaddr !=
- hugepg_tbl[j-1].physaddr + hugepage_sz)
- break;
-#endif
- }
- num_pages = j - i;
- vma_len = num_pages * hugepage_sz;
-
- socket = hugepg_tbl[i].socket_id;
-
- /* get the biggest virtual memory area up to
- * vma_len. If it fails, vma_addr is NULL, so
- * let the kernel provide the address. */
- vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
-
- /* If we can't find a big enough virtual area, work out how many pages
- * we are going to get */
- if (vma_addr == NULL)
- j = i + 1;
- else if (vma_len != num_pages * hugepage_sz) {
- num_pages = vma_len / hugepage_sz;
- j = i + num_pages;
-
- }
-
- hugepg_tbl[page_idx].file_id = page_idx;
- eal_get_hugefile_path(filepath,
- sizeof(filepath),
- hpi->hugedir,
- hugepg_tbl[page_idx].file_id);
-
- /* try to create hugepage file */
- fd = open(filepath, O_CREAT | O_RDWR, 0755);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno));
- return -1;
- }
-
- total_size = 0;
- for (;i < j; i++) {
-
- /* unmap current segment */
- if (total_size > 0)
- munmap(vma_addr, total_size);
-
- /* unmap original page */
- munmap(hugepg_tbl[i].orig_va, hugepage_sz);
- unlink(hugepg_tbl[i].filepath);
-
- total_size += hugepage_sz;
-
- old_addr = vma_addr;
-
- /* map new, bigger segment, and populate page tables,
- * the kernel fills this segment with zeros */
- vma_addr = mmap(vma_addr, total_size,
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0);
-
- if (vma_addr == MAP_FAILED || vma_addr != old_addr) {
- RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno));
- close(fd);
- return -1;
- }
- }
-
- /* set shared flock on the file. */
- if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
- RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n",
- __func__, strerror(errno));
- close(fd);
- return -1;
- }
-
- snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s",
- filepath);
-
- physaddr = rte_mem_virt2phy(vma_addr);
-
- if (physaddr == RTE_BAD_PHYS_ADDR)
- return -1;
-
- hugepg_tbl[page_idx].final_va = vma_addr;
-
- hugepg_tbl[page_idx].physaddr = physaddr;
-
- hugepg_tbl[page_idx].repeated = num_pages;
-
- hugepg_tbl[page_idx].socket_id = socket;
-
- close(fd);
-
- /* verify the memory segment - that is, check that every VA corresponds
- * to the physical address we expect to see
- */
- for (offset = 0; offset < vma_len; offset += hugepage_sz) {
- uint64_t expected_physaddr;
-
- expected_physaddr = hugepg_tbl[page_idx].physaddr + offset;
- page_addr = RTE_PTR_ADD(vma_addr, offset);
- physaddr = rte_mem_virt2phy(page_addr);
-
- if (physaddr != expected_physaddr) {
- RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr "
- "at %p (offset 0x%" PRIx64 ": 0x%" PRIx64
- " (expected 0x%" PRIx64 ")\n",
- page_addr, offset, physaddr, expected_physaddr);
- return -1;
- }
- }
-
- page_idx++;
- }
-
- /* zero out the rest */
- memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file));
- return page_idx;
-}
-#else/* RTE_EAL_SINGLE_FILE_SEGMENTS=n */
-
/* Unmap all hugepages from original mapping */
static int
unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
@@ -681,7 +505,6 @@ unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info
}
return 0;
}
-#endif /* RTE_EAL_SINGLE_FILE_SEGMENTS */
/*
* Parse /proc/self/numa_maps to get the NUMA socket ID for each huge
@@ -875,12 +698,6 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
for (page = 0; page < nrpages; page++) {
struct hugepage_file *hp = &hugepg_tbl[page];
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
- /* if this page was already cleared */
- if (hp->final_va == NULL)
- continue;
-#endif
-
/* find a page that matches the criteria */
if ((hp->size == hpi[size].hugepage_sz) &&
(hp->socket_id == (int) socket)) {
@@ -889,11 +706,7 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
if (pages_found == hpi[size].num_pages[socket]) {
uint64_t unmap_len;
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
- unmap_len = hp->size * hp->repeated;
-#else
unmap_len = hp->size;
-#endif
/* get start addr and len of the remaining segment */
munmap(hp->final_va, (size_t) unmap_len);
@@ -904,50 +717,10 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
__func__, hp->filepath, strerror(errno));
return -1;
}
- }
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
- /* else, check how much do we need to map */
- else {
- int nr_pg_left =
- hpi[size].num_pages[socket] - pages_found;
-
- /* if we need enough memory to fit into the segment */
- if (hp->repeated <= nr_pg_left) {
- pages_found += hp->repeated;
- }
- /* truncate the segment */
- else {
- uint64_t final_size = nr_pg_left * hp->size;
- uint64_t seg_size = hp->repeated * hp->size;
-
- void * unmap_va = RTE_PTR_ADD(hp->final_va,
- final_size);
- int fd;
-
- munmap(unmap_va, seg_size - final_size);
-
- fd = open(hp->filepath, O_RDWR);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
- hp->filepath, strerror(errno));
- return -1;
- }
- if (ftruncate(fd, final_size) < 0) {
- RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n",
- hp->filepath, strerror(errno));
- return -1;
- }
- close(fd);
-
- pages_found += nr_pg_left;
- hp->repeated = nr_pg_left;
- }
- }
-#else
- /* else, lock the page and skip */
- else
+ } else {
+ /* lock the page and skip */
pages_found++;
-#endif
+ }
} /* match page */
} /* foreach page */
@@ -1177,9 +950,6 @@ rte_eal_hugepage_init(void)
int i, j, new_memseg;
int nr_hugefiles, nr_hugepages = 0;
void *addr;
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
- int new_pages_count[MAX_HUGEPAGE_SIZES];
-#endif
test_proc_pagemap_readable();
@@ -1260,13 +1030,6 @@ rte_eal_hugepage_init(void)
pages_old = hpi->num_pages[0];
pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, 1);
if (pages_new < pages_old) {
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
- RTE_LOG(ERR, EAL,
- "%d not %d hugepages of size %u MB allocated\n",
- pages_new, pages_old,
- (unsigned)(hpi->hugepage_sz / 0x100000));
- goto fail;
-#else
RTE_LOG(DEBUG, EAL,
"%d not %d hugepages of size %u MB allocated\n",
pages_new, pages_old,
@@ -1278,7 +1041,6 @@ rte_eal_hugepage_init(void)
hpi->num_pages[0] = pages_new;
if (pages_new == 0)
continue;
-#endif
}
/* find physical addresses and sockets for each hugepage */
@@ -1297,18 +1059,6 @@ rte_eal_hugepage_init(void)
qsort(&tmp_hp[hp_offset], hpi->num_pages[0],
sizeof(struct hugepage_file), cmp_physaddr);
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
- /* remap all hugepages into single file segments */
- new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi);
- if (new_pages_count[i] < 0){
- RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",
- (unsigned)(hpi->hugepage_sz / 0x100000));
- goto fail;
- }
-
- /* we have processed a num of hugepages of this size, so inc offset */
- hp_offset += new_pages_count[i];
-#else
/* remap all hugepages */
if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) !=
hpi->num_pages[0]) {
@@ -1323,7 +1073,6 @@ rte_eal_hugepage_init(void)
/* we have processed a num of hugepages of this size, so inc offset */
hp_offset += hpi->num_pages[0];
-#endif
}
huge_recover_sigbus();
@@ -1331,14 +1080,7 @@ rte_eal_hugepage_init(void)
if (internal_config.memory == 0 && internal_config.force_sockets == 0)
internal_config.memory = eal_get_hugepage_mem_size();
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
- nr_hugefiles = 0;
- for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
- nr_hugefiles += new_pages_count[i];
- }
-#else
nr_hugefiles = nr_hugepages;
-#endif
/* clean out the numbers of pages */
@@ -1356,12 +1098,7 @@ rte_eal_hugepage_init(void)
for (j = 0; j < nb_hpsizes; j++) {
if (tmp_hp[i].size ==
internal_config.hugepage_info[j].hugepage_sz) {
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
- internal_config.hugepage_info[j].num_pages[socket] +=
- tmp_hp[i].repeated;
-#else
internal_config.hugepage_info[j].num_pages[socket]++;
-#endif
}
}
}
@@ -1436,15 +1173,8 @@ rte_eal_hugepage_init(void)
free(tmp_hp);
tmp_hp = NULL;
- /* find earliest free memseg - this is needed because in case of IVSHMEM,
- * segments might have already been initialized */
- for (j = 0; j < RTE_MAX_MEMSEG; j++)
- if (mcfg->memseg[j].addr == NULL) {
- /* move to previous segment and exit loop */
- j--;
- break;
- }
-
+ /* first memseg index shall be 0 after incrementing it below */
+ j = -1;
for (i = 0; i < nr_hugefiles; i++) {
new_memseg = 0;
@@ -1482,11 +1212,7 @@ rte_eal_hugepage_init(void)
mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
mcfg->memseg[j].addr = hugepage[i].final_va;
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
- mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated;
-#else
mcfg->memseg[j].len = hugepage[i].size;
-#endif
mcfg->memseg[j].socket_id = hugepage[i].socket_id;
mcfg->memseg[j].hugepage_sz = hugepage[i].size;
}
@@ -1598,15 +1324,6 @@ rte_eal_hugepage_attach(void)
if (mcfg->memseg[s].len == 0)
break;
-#ifdef RTE_LIBRTE_IVSHMEM
- /*
- * if segment has ioremap address set, it's an IVSHMEM segment and
- * doesn't need mapping as it was already mapped earlier
- */
- if (mcfg->memseg[s].ioremap_addr != 0)
- continue;
-#endif
-
/*
* fdzero is mmapped to get a contiguous block of virtual
* addresses of the appropriate memseg size.
@@ -1616,13 +1333,21 @@ rte_eal_hugepage_attach(void)
PROT_READ, MAP_PRIVATE, fd_zero, 0);
if (base_addr == MAP_FAILED ||
base_addr != mcfg->memseg[s].addr) {
- RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
- "in /dev/zero to requested address [%p]: '%s'\n",
- (unsigned long long)mcfg->memseg[s].len,
- mcfg->memseg[s].addr, strerror(errno));
max_seg = s;
- if (base_addr != MAP_FAILED)
+ if (base_addr != MAP_FAILED) {
+ /* errno is stale, don't use */
+ RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
+ "in /dev/zero at [%p], got [%p] - "
+ "please use '--base-virtaddr' option\n",
+ (unsigned long long)mcfg->memseg[s].len,
+ mcfg->memseg[s].addr, base_addr);
munmap(base_addr, mcfg->memseg[s].len);
+ } else {
+ RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
+ "in /dev/zero at [%p]: '%s'\n",
+ (unsigned long long)mcfg->memseg[s].len,
+ mcfg->memseg[s].addr, strerror(errno));
+ }
if (aslr_enabled() > 0) {
RTE_LOG(ERR, EAL, "It is recommended to "
"disable ASLR in the kernel "
@@ -1648,16 +1373,6 @@ rte_eal_hugepage_attach(void)
void *addr, *base_addr;
uintptr_t offset = 0;
size_t mapping_size;
-#ifdef RTE_LIBRTE_IVSHMEM
- /*
- * if segment has ioremap address set, it's an IVSHMEM segment and
- * doesn't need mapping as it was already mapped earlier
- */
- if (mcfg->memseg[s].ioremap_addr != 0) {
- s++;
- continue;
- }
-#endif
/*
* free previously mapped memory so we can map the
* hugepages into the space
@@ -1676,11 +1391,7 @@ rte_eal_hugepage_attach(void)
hp[i].filepath);
goto error;
}
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
- mapping_size = hp[i].size * hp[i].repeated;
-#else
mapping_size = hp[i].size;
-#endif
addr = mmap(RTE_PTR_ADD(base_addr, offset),
mapping_size, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index cd9de7cc..876ba381 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -350,13 +350,13 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
dirname);
if (access(filename, R_OK) != 0) {
/* if no NUMA support, set default to 0 */
- dev->numa_node = 0;
+ dev->device.numa_node = 0;
} else {
if (eal_parse_sysfs_value(filename, &tmp) < 0) {
free(dev);
return -1;
}
- dev->numa_node = tmp;
+ dev->device.numa_node = tmp;
}
/* parse resources */
@@ -390,6 +390,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
/* device is valid, add in list (sorted) */
if (TAILQ_EMPTY(&pci_device_list)) {
+ rte_eal_device_insert(&dev->device);
TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
} else {
struct rte_pci_device *dev2;
@@ -402,6 +403,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
if (ret < 0) {
TAILQ_INSERT_BEFORE(dev2, dev, next);
+ rte_eal_device_insert(&dev->device);
} else { /* already registered */
dev2->kdrv = dev->kdrv;
dev2->max_vfs = dev->max_vfs;
@@ -411,12 +413,26 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
}
return 0;
}
+ rte_eal_device_insert(&dev->device);
TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
}
return 0;
}
+int
+pci_update_device(const struct rte_pci_addr *addr)
+{
+ char filename[PATH_MAX];
+
+ snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT,
+ pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
+ addr->function);
+
+ return pci_scan_one(filename, addr->domain, addr->bus, addr->devid,
+ addr->function);
+}
+
/*
* split up a pci address into its constituent parts.
*/
@@ -743,9 +759,6 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p)
int
rte_eal_pci_init(void)
{
- TAILQ_INIT(&pci_driver_list);
- TAILQ_INIT(&pci_device_list);
-
/* for debug purposes, PCI can be disabled */
if (internal_config.no_pci)
return 0;
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
index 3dacbff8..d459bf48 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
@@ -82,6 +82,7 @@ struct rte_epoll_event {
/** Handle for interrupts. */
struct rte_intr_handle {
+ RTE_STD_C11
union {
int vfio_dev_fd; /**< VFIO device file descriptor */
int uio_cfg_fd; /**< UIO config file descriptor
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
index 2acdfd9b..09713b0c 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -61,6 +61,9 @@
#ifdef __KERNEL__
#include <linux/if.h>
+#define RTE_STD_C11
+#else
+#include <rte_common.h>
#endif
/**
@@ -85,6 +88,7 @@ enum rte_kni_req_id {
*/
struct rte_kni_request {
uint32_t req_id; /**< Request id */
+ RTE_STD_C11
union {
uint32_t new_mtu; /**< New MTU */
uint8_t if_up; /**< 1: interface up, 0: interface down */
@@ -102,7 +106,7 @@ struct rte_kni_fifo {
volatile unsigned read; /**< Next position to be read */
unsigned len; /**< Circular buffer length */
unsigned elem_size; /**< Pointer size - for 32/64 bit OS */
- void * volatile buffer[0]; /**< The buffer contains mbuf pointers */
+ void *volatile buffer[]; /**< The buffer contains mbuf pointers */
};
/*
@@ -111,7 +115,8 @@ struct rte_kni_fifo {
*/
struct rte_kni_mbuf {
void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE)));
- char pad0[10];
+ uint64_t buf_physaddr;
+ char pad0[2];
uint16_t data_off; /**< Start address of data in segment buffer. */
char pad1[2];
uint8_t nb_segs; /**< Number of segments. */
@@ -159,6 +164,7 @@ struct rte_kni_device_info {
uint16_t group_id; /**< Group ID */
uint32_t core_id; /**< core ID to bind for kernel thread */
+ __extension__
uint8_t force_bind : 1; /**< Flag for kernel thread binding */
/* mbuf size */
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index db8c9845..83721ba5 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -166,3 +166,15 @@ DPDK_16.07 {
rte_thread_setname;
} DPDK_16.04;
+
+DPDK_16.11 {
+ global:
+
+ rte_delay_us_block;
+ rte_delay_us_callback_register;
+ rte_eal_dev_attach;
+ rte_eal_dev_detach;
+ rte_eal_vdrv_register;
+ rte_eal_vdrv_unregister;
+
+} DPDK_16.07;
diff --git a/lib/librte_eal/linuxapp/kni/Makefile b/lib/librte_eal/linuxapp/kni/Makefile
index 8cc6b61c..4e99e07e 100644
--- a/lib/librte_eal/linuxapp/kni/Makefile
+++ b/lib/librte_eal/linuxapp/kni/Makefile
@@ -76,14 +76,9 @@ SRCS-y += ethtool/igb/e1000_mbx.c
SRCS-y += ethtool/igb/e1000_nvm.c
SRCS-y += ethtool/igb/e1000_phy.c
SRCS-y += ethtool/igb/igb_ethtool.c
-SRCS-y += ethtool/igb/igb_hwmon.c
SRCS-y += ethtool/igb/igb_main.c
-SRCS-y += ethtool/igb/igb_debugfs.c
SRCS-y += ethtool/igb/igb_param.c
-SRCS-y += ethtool/igb/igb_procfs.c
SRCS-y += ethtool/igb/igb_vmdq.c
-#SRCS-y += ethtool/igb/igb_ptp.c
-#SRCS-y += ethtool/igb/kcompat.c
SRCS-y += kni_misc.c
SRCS-y += kni_net.c
diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h
index 647ba3ce..78da08e5 100644
--- a/lib/librte_eal/linuxapp/kni/compat.h
+++ b/lib/librte_eal/linuxapp/kni/compat.h
@@ -19,13 +19,25 @@
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
-#define sk_sleep(s) (s)->sk_sleep
+#define sk_sleep(s) ((s)->sk_sleep)
+#else
+#define HAVE_SOCKET_WQ
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+#define HAVE_STATIC_SOCK_MAP_FD
+#else
+#define kni_sock_map_fd(s) sock_map_fd(s, 0)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
#define HAVE_CHANGE_CARRIER_CB
#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define ether_addr_copy(dst, src) memcpy(dst, src, ETH_ALEN)
+#endif
+
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
#define HAVE_IOV_ITER_MSGHDR
#endif
@@ -35,6 +47,23 @@
#define HAVE_REBUILD_HEADER
#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
+#define HAVE_SK_ALLOC_KERN_PARAM
+#endif
+
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
#define HAVE_TRANS_START_HELPER
#endif
+
+/*
+ * KNI uses NET_NAME_UNKNOWN macro to select correct version of alloc_netdev()
+ * For old kernels just backported the commit that enables the macro
+ * (685343fc3ba6) but still uses old API, it is required to undefine macro to
+ * select correct version of API, this is safe since KNI doesn't use the value.
+ * This fix is specific to RedHat/CentOS kernels.
+ */
+#if (defined(RHEL_RELEASE_CODE) && \
+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \
+ (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)))
+#undef NET_NAME_UNKNOWN
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING b/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING
deleted file mode 100644
index 5f297e5b..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING
+++ /dev/null
@@ -1,339 +0,0 @@
-
-"This software program is licensed subject to the GNU General Public License
-(GPL). Version 2, June 1991, available at
-<http://www.fsf.org/copyleft/gpl.html>"
-
-GNU General Public License
-
-Version 2, June 1991
-
-Copyright (C) 1989, 1991 Free Software Foundation, Inc.
-59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-Everyone is permitted to copy and distribute verbatim copies of this license
-document, but changing it is not allowed.
-
-Preamble
-
-The licenses for most software are designed to take away your freedom to
-share and change it. By contrast, the GNU General Public License is intended
-to guarantee your freedom to share and change free software--to make sure
-the software is free for all its users. This General Public License applies
-to most of the Free Software Foundation's software and to any other program
-whose authors commit to using it. (Some other Free Software Foundation
-software is covered by the GNU Library General Public License instead.) You
-can apply it to your programs, too.
-
-When we speak of free software, we are referring to freedom, not price. Our
-General Public Licenses are designed to make sure that you have the freedom
-to distribute copies of free software (and charge for this service if you
-wish), that you receive source code or can get it if you want it, that you
-can change the software or use pieces of it in new free programs; and that
-you know you can do these things.
-
-To protect your rights, we need to make restrictions that forbid anyone to
-deny you these rights or to ask you to surrender the rights. These
-restrictions translate to certain responsibilities for you if you distribute
-copies of the software, or if you modify it.
-
-For example, if you distribute copies of such a program, whether gratis or
-for a fee, you must give the recipients all the rights that you have. You
-must make sure that they, too, receive or can get the source code. And you
-must show them these terms so they know their rights.
-
-We protect your rights with two steps: (1) copyright the software, and (2)
-offer you this license which gives you legal permission to copy, distribute
-and/or modify the software.
-
-Also, for each author's protection and ours, we want to make certain that
-everyone understands that there is no warranty for this free software. If
-the software is modified by someone else and passed on, we want its
-recipients to know that what they have is not the original, so that any
-problems introduced by others will not reflect on the original authors'
-reputations.
-
-Finally, any free program is threatened constantly by software patents. We
-wish to avoid the danger that redistributors of a free program will
-individually obtain patent licenses, in effect making the program
-proprietary. To prevent this, we have made it clear that any patent must be
-licensed for everyone's free use or not licensed at all.
-
-The precise terms and conditions for copying, distribution and modification
-follow.
-
-TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
-0. This License applies to any program or other work which contains a notice
- placed by the copyright holder saying it may be distributed under the
- terms of this General Public License. The "Program", below, refers to any
- such program or work, and a "work based on the Program" means either the
- Program or any derivative work under copyright law: that is to say, a
- work containing the Program or a portion of it, either verbatim or with
- modifications and/or translated into another language. (Hereinafter,
- translation is included without limitation in the term "modification".)
- Each licensee is addressed as "you".
-
- Activities other than copying, distribution and modification are not
- covered by this License; they are outside its scope. The act of running
- the Program is not restricted, and the output from the Program is covered
- only if its contents constitute a work based on the Program (independent
- of having been made by running the Program). Whether that is true depends
- on what the Program does.
-
-1. You may copy and distribute verbatim copies of the Program's source code
- as you receive it, in any medium, provided that you conspicuously and
- appropriately publish on each copy an appropriate copyright notice and
- disclaimer of warranty; keep intact all the notices that refer to this
- License and to the absence of any warranty; and give any other recipients
- of the Program a copy of this License along with the Program.
-
- You may charge a fee for the physical act of transferring a copy, and you
- may at your option offer warranty protection in exchange for a fee.
-
-2. You may modify your copy or copies of the Program or any portion of it,
- thus forming a work based on the Program, and copy and distribute such
- modifications or work under the terms of Section 1 above, provided that
- you also meet all of these conditions:
-
- * a) You must cause the modified files to carry prominent notices stating
- that you changed the files and the date of any change.
-
- * b) You must cause any work that you distribute or publish, that in
- whole or in part contains or is derived from the Program or any part
- thereof, to be licensed as a whole at no charge to all third parties
- under the terms of this License.
-
- * c) If the modified program normally reads commands interactively when
- run, you must cause it, when started running for such interactive
- use in the most ordinary way, to print or display an announcement
- including an appropriate copyright notice and a notice that there is
- no warranty (or else, saying that you provide a warranty) and that
- users may redistribute the program under these conditions, and
- telling the user how to view a copy of this License. (Exception: if
- the Program itself is interactive but does not normally print such
- an announcement, your work based on the Program is not required to
- print an announcement.)
-
- These requirements apply to the modified work as a whole. If identifiable
- sections of that work are not derived from the Program, and can be
- reasonably considered independent and separate works in themselves, then
- this License, and its terms, do not apply to those sections when you
- distribute them as separate works. But when you distribute the same
- sections as part of a whole which is a work based on the Program, the
- distribution of the whole must be on the terms of this License, whose
- permissions for other licensees extend to the entire whole, and thus to
- each and every part regardless of who wrote it.
-
- Thus, it is not the intent of this section to claim rights or contest
- your rights to work written entirely by you; rather, the intent is to
- exercise the right to control the distribution of derivative or
- collective works based on the Program.
-
- In addition, mere aggregation of another work not based on the Program
- with the Program (or with a work based on the Program) on a volume of a
- storage or distribution medium does not bring the other work under the
- scope of this License.
-
-3. You may copy and distribute the Program (or a work based on it, under
- Section 2) in object code or executable form under the terms of Sections
- 1 and 2 above provided that you also do one of the following:
-
- * a) Accompany it with the complete corresponding machine-readable source
- code, which must be distributed under the terms of Sections 1 and 2
- above on a medium customarily used for software interchange; or,
-
- * b) Accompany it with a written offer, valid for at least three years,
- to give any third party, for a charge no more than your cost of
- physically performing source distribution, a complete machine-
- readable copy of the corresponding source code, to be distributed
- under the terms of Sections 1 and 2 above on a medium customarily
- used for software interchange; or,
-
- * c) Accompany it with the information you received as to the offer to
- distribute corresponding source code. (This alternative is allowed
- only for noncommercial distribution and only if you received the
- program in object code or executable form with such an offer, in
- accord with Subsection b above.)
-
- The source code for a work means the preferred form of the work for
- making modifications to it. For an executable work, complete source code
- means all the source code for all modules it contains, plus any
- associated interface definition files, plus the scripts used to control
- compilation and installation of the executable. However, as a special
- exception, the source code distributed need not include anything that is
- normally distributed (in either source or binary form) with the major
- components (compiler, kernel, and so on) of the operating system on which
- the executable runs, unless that component itself accompanies the
- executable.
-
- If distribution of executable or object code is made by offering access
- to copy from a designated place, then offering equivalent access to copy
- the source code from the same place counts as distribution of the source
- code, even though third parties are not compelled to copy the source
- along with the object code.
-
-4. You may not copy, modify, sublicense, or distribute the Program except as
- expressly provided under this License. Any attempt otherwise to copy,
- modify, sublicense or distribute the Program is void, and will
- automatically terminate your rights under this License. However, parties
- who have received copies, or rights, from you under this License will not
- have their licenses terminated so long as such parties remain in full
- compliance.
-
-5. You are not required to accept this License, since you have not signed
- it. However, nothing else grants you permission to modify or distribute
- the Program or its derivative works. These actions are prohibited by law
- if you do not accept this License. Therefore, by modifying or
- distributing the Program (or any work based on the Program), you
- indicate your acceptance of this License to do so, and all its terms and
- conditions for copying, distributing or modifying the Program or works
- based on it.
-
-6. Each time you redistribute the Program (or any work based on the
- Program), the recipient automatically receives a license from the
- original licensor to copy, distribute or modify the Program subject to
- these terms and conditions. You may not impose any further restrictions
- on the recipients' exercise of the rights granted herein. You are not
- responsible for enforcing compliance by third parties to this License.
-
-7. If, as a consequence of a court judgment or allegation of patent
- infringement or for any other reason (not limited to patent issues),
- conditions are imposed on you (whether by court order, agreement or
- otherwise) that contradict the conditions of this License, they do not
- excuse you from the conditions of this License. If you cannot distribute
- so as to satisfy simultaneously your obligations under this License and
- any other pertinent obligations, then as a consequence you may not
- distribute the Program at all. For example, if a patent license would
- not permit royalty-free redistribution of the Program by all those who
- receive copies directly or indirectly through you, then the only way you
- could satisfy both it and this License would be to refrain entirely from
- distribution of the Program.
-
- If any portion of this section is held invalid or unenforceable under any
- particular circumstance, the balance of the section is intended to apply
- and the section as a whole is intended to apply in other circumstances.
-
- It is not the purpose of this section to induce you to infringe any
- patents or other property right claims or to contest validity of any
- such claims; this section has the sole purpose of protecting the
- integrity of the free software distribution system, which is implemented
- by public license practices. Many people have made generous contributions
- to the wide range of software distributed through that system in
- reliance on consistent application of that system; it is up to the
- author/donor to decide if he or she is willing to distribute software
- through any other system and a licensee cannot impose that choice.
-
- This section is intended to make thoroughly clear what is believed to be
- a consequence of the rest of this License.
-
-8. If the distribution and/or use of the Program is restricted in certain
- countries either by patents or by copyrighted interfaces, the original
- copyright holder who places the Program under this License may add an
- explicit geographical distribution limitation excluding those countries,
- so that distribution is permitted only in or among countries not thus
- excluded. In such case, this License incorporates the limitation as if
- written in the body of this License.
-
-9. The Free Software Foundation may publish revised and/or new versions of
- the General Public License from time to time. Such new versions will be
- similar in spirit to the present version, but may differ in detail to
- address new problems or concerns.
-
- Each version is given a distinguishing version number. If the Program
- specifies a version number of this License which applies to it and "any
- later version", you have the option of following the terms and
- conditions either of that version or of any later version published by
- the Free Software Foundation. If the Program does not specify a version
- number of this License, you may choose any version ever published by the
- Free Software Foundation.
-
-10. If you wish to incorporate parts of the Program into other free programs
- whose distribution conditions are different, write to the author to ask
- for permission. For software which is copyrighted by the Free Software
- Foundation, write to the Free Software Foundation; we sometimes make
- exceptions for this. Our decision will be guided by the two goals of
- preserving the free status of all derivatives of our free software and
- of promoting the sharing and reuse of software generally.
-
- NO WARRANTY
-
-11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
- FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
- OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
- PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
- EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
- ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH
- YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
- NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
- WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
- REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR
- DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL
- DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM
- (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED
- INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF
- THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR
- OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
-
-END OF TERMS AND CONDITIONS
-
-How to Apply These Terms to Your New Programs
-
-If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it free
-software which everyone can redistribute and change under these terms.
-
-To do so, attach the following notices to the program. It is safest to
-attach them to the start of each source file to most effectively convey the
-exclusion of warranty; and each file should have at least the "copyright"
-line and a pointer to where the full notice is found.
-
-one line to give the program's name and an idea of what it does.
-Copyright (C) yyyy name of author
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2 of the License, or (at your option)
-any later version.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59
-Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-Also add information on how to contact you by electronic and paper mail.
-
-If the program is interactive, make it output a short notice like this when
-it starts in an interactive mode:
-
-Gnomovision version 69, Copyright (C) year name of author Gnomovision comes
-with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free
-software, and you are welcome to redistribute it under certain conditions;
-type 'show c' for details.
-
-The hypothetical commands 'show w' and 'show c' should show the appropriate
-parts of the General Public License. Of course, the commands you use may be
-called something other than 'show w' and 'show c'; they could even be
-mouse-clicks or menu items--whatever suits your program.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a "copyright disclaimer" for the program, if
-necessary. Here is a sample; alter the names:
-
-Yoyodyne, Inc., hereby disclaims all copyright interest in the program
-'Gnomovision' (which makes passes at compilers) written by James Hacker.
-
-signature of Ty Coon, 1 April 1989
-Ty Coon, President of Vice
-
-This General Public License does not permit incorporating your program into
-proprietary programs. If your program is a subroutine library, you may
-consider it more useful to permit linking proprietary applications with the
-library. If this is what you want to do, use the GNU Library General Public
-License instead of this License.
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
index b8c9a13f..d558af20 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h
index 1aec75ab..185ccdf1 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c
index 6095d3b4..220c9a40 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h
index b21294ec..55c8a5f4 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h
index 63b228c5..d42c7998 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h
index 347cef71..35886e93 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c
index 1e9f3e6e..7e4c20a9 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h
index 57b2eb56..b8fa70d0 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c
index 4ee59ba9..74319def 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h
index 6a1b0f52..3bcdd88c 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c
index a1700398..51dfae5d 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h
index c94b2185..0627f271 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c
index 3ef0d98b..bd64429f 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h
index bbf838c8..64685d9d 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c
index 6188d007..1ce59154 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h
index fe62785a..17bc53c3 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h
index d1cf98e2..c1ab60c4 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
index 140a2a47..d8a77c45 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h
index 5387c5e7..db24fb0b 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h
index 0e083c54..830ec991 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h
index e5554ca3..d077b49e 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c
deleted file mode 100644
index c07f9f53..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
-
- Intel(R) Gigabit Ethernet Linux driver
- Copyright(c) 2007-2013 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "igb.h"
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
index af7e68a5..d7a987d5 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c
deleted file mode 100644
index 07a1ae07..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c
+++ /dev/null
@@ -1,260 +0,0 @@
-/*******************************************************************************
-
- Intel(R) Gigabit Ethernet Linux driver
- Copyright(c) 2007-2013 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "igb.h"
-#include "e1000_82575.h"
-#include "e1000_hw.h"
-#ifdef IGB_HWMON
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/sysfs.h>
-#include <linux/kobject.h>
-#include <linux/device.h>
-#include <linux/netdevice.h>
-#include <linux/hwmon.h>
-#include <linux/pci.h>
-
-#ifdef HAVE_I2C_SUPPORT
-static struct i2c_board_info i350_sensor_info = {
- I2C_BOARD_INFO("i350bb", (0Xf8 >> 1)),
-};
-#endif /* HAVE_I2C_SUPPORT */
-
-/* hwmon callback functions */
-static ssize_t igb_hwmon_show_location(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
- dev_attr);
- return sprintf(buf, "loc%u\n",
- igb_attr->sensor->location);
-}
-
-static ssize_t igb_hwmon_show_temp(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
- dev_attr);
- unsigned int value;
-
- /* reset the temp field */
- igb_attr->hw->mac.ops.get_thermal_sensor_data(igb_attr->hw);
-
- value = igb_attr->sensor->temp;
-
- /* display millidegree */
- value *= 1000;
-
- return sprintf(buf, "%u\n", value);
-}
-
-static ssize_t igb_hwmon_show_cautionthresh(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
- dev_attr);
- unsigned int value = igb_attr->sensor->caution_thresh;
-
- /* display millidegree */
- value *= 1000;
-
- return sprintf(buf, "%u\n", value);
-}
-
-static ssize_t igb_hwmon_show_maxopthresh(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
- dev_attr);
- unsigned int value = igb_attr->sensor->max_op_thresh;
-
- /* display millidegree */
- value *= 1000;
-
- return sprintf(buf, "%u\n", value);
-}
-
-/* igb_add_hwmon_attr - Create hwmon attr table for a hwmon sysfs file.
- * @ adapter: pointer to the adapter structure
- * @ offset: offset in the eeprom sensor data table
- * @ type: type of sensor data to display
- *
- * For each file we want in hwmon's sysfs interface we need a device_attribute
- * This is included in our hwmon_attr struct that contains the references to
- * the data structures we need to get the data to display.
- */
-static int igb_add_hwmon_attr(struct igb_adapter *adapter,
- unsigned int offset, int type) {
- int rc;
- unsigned int n_attr;
- struct hwmon_attr *igb_attr;
-
- n_attr = adapter->igb_hwmon_buff.n_hwmon;
- igb_attr = &adapter->igb_hwmon_buff.hwmon_list[n_attr];
-
- switch (type) {
- case IGB_HWMON_TYPE_LOC:
- igb_attr->dev_attr.show = igb_hwmon_show_location;
- snprintf(igb_attr->name, sizeof(igb_attr->name),
- "temp%u_label", offset);
- break;
- case IGB_HWMON_TYPE_TEMP:
- igb_attr->dev_attr.show = igb_hwmon_show_temp;
- snprintf(igb_attr->name, sizeof(igb_attr->name),
- "temp%u_input", offset);
- break;
- case IGB_HWMON_TYPE_CAUTION:
- igb_attr->dev_attr.show = igb_hwmon_show_cautionthresh;
- snprintf(igb_attr->name, sizeof(igb_attr->name),
- "temp%u_max", offset);
- break;
- case IGB_HWMON_TYPE_MAX:
- igb_attr->dev_attr.show = igb_hwmon_show_maxopthresh;
- snprintf(igb_attr->name, sizeof(igb_attr->name),
- "temp%u_crit", offset);
- break;
- default:
- rc = -EPERM;
- return rc;
- }
-
- /* These always the same regardless of type */
- igb_attr->sensor =
- &adapter->hw.mac.thermal_sensor_data.sensor[offset];
- igb_attr->hw = &adapter->hw;
- igb_attr->dev_attr.store = NULL;
- igb_attr->dev_attr.attr.mode = S_IRUGO;
- igb_attr->dev_attr.attr.name = igb_attr->name;
- sysfs_attr_init(&igb_attr->dev_attr.attr);
- rc = device_create_file(&adapter->pdev->dev,
- &igb_attr->dev_attr);
- if (rc == 0)
- ++adapter->igb_hwmon_buff.n_hwmon;
-
- return rc;
-}
-
-static void igb_sysfs_del_adapter(struct igb_adapter *adapter)
-{
- int i;
-
- if (adapter == NULL)
- return;
-
- for (i = 0; i < adapter->igb_hwmon_buff.n_hwmon; i++) {
- device_remove_file(&adapter->pdev->dev,
- &adapter->igb_hwmon_buff.hwmon_list[i].dev_attr);
- }
-
- kfree(adapter->igb_hwmon_buff.hwmon_list);
-
- if (adapter->igb_hwmon_buff.device)
- hwmon_device_unregister(adapter->igb_hwmon_buff.device);
-}
-
-/* called from igb_main.c */
-void igb_sysfs_exit(struct igb_adapter *adapter)
-{
- igb_sysfs_del_adapter(adapter);
-}
-
-/* called from igb_main.c */
-int igb_sysfs_init(struct igb_adapter *adapter)
-{
- struct hwmon_buff *igb_hwmon = &adapter->igb_hwmon_buff;
- unsigned int i;
- int n_attrs;
- int rc = 0;
-#ifdef HAVE_I2C_SUPPORT
- struct i2c_client *client = NULL;
-#endif /* HAVE_I2C_SUPPORT */
-
- /* If this method isn't defined we don't support thermals */
- if (adapter->hw.mac.ops.init_thermal_sensor_thresh == NULL)
- goto exit;
-
- /* Don't create thermal hwmon interface if no sensors present */
- rc = (adapter->hw.mac.ops.init_thermal_sensor_thresh(&adapter->hw));
- if (rc)
- goto exit;
-#ifdef HAVE_I2C_SUPPORT
- /* init i2c_client */
- client = i2c_new_device(&adapter->i2c_adap, &i350_sensor_info);
- if (client == NULL) {
- dev_info(&adapter->pdev->dev,
- "Failed to create new i2c device..\n");
- goto exit;
- }
- adapter->i2c_client = client;
-#endif /* HAVE_I2C_SUPPORT */
-
- /* Allocation space for max attributes
- * max num sensors * values (loc, temp, max, caution)
- */
- n_attrs = E1000_MAX_SENSORS * 4;
- igb_hwmon->hwmon_list = kcalloc(n_attrs, sizeof(struct hwmon_attr),
- GFP_KERNEL);
- if (!igb_hwmon->hwmon_list) {
- rc = -ENOMEM;
- goto err;
- }
-
- igb_hwmon->device = hwmon_device_register(&adapter->pdev->dev);
- if (IS_ERR(igb_hwmon->device)) {
- rc = PTR_ERR(igb_hwmon->device);
- goto err;
- }
-
- for (i = 0; i < E1000_MAX_SENSORS; i++) {
-
- /* Only create hwmon sysfs entries for sensors that have
- * meaningful data.
- */
- if (adapter->hw.mac.thermal_sensor_data.sensor[i].location == 0)
- continue;
-
- /* Bail if any hwmon attr struct fails to initialize */
- rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_CAUTION);
- rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_LOC);
- rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_TEMP);
- rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_MAX);
- if (rc)
- goto err;
- }
-
- goto exit;
-
-err:
- igb_sysfs_del_adapter(adapter);
-exit:
- return rc;
-}
-#endif /* IGB_HWMON */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
index f1dcc95b..f4dca5a3 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
@@ -1562,6 +1562,7 @@ static void igb_check_swap_media(struct igb_adapter *adapter)
ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
connsw = E1000_READ_REG(hw, E1000_CONNSW);
link = igb_has_link(adapter);
+ (void) link;
/* need to live swap if current media is copper and we have fiber/serdes
* to go to.
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c
index f79ce7c1..c922ca2f 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c
deleted file mode 100644
index 66236d29..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c
+++ /dev/null
@@ -1,363 +0,0 @@
-/*******************************************************************************
-
- Intel(R) Gigabit Ethernet Linux driver
- Copyright(c) 2007-2013 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "igb.h"
-#include "e1000_82575.h"
-#include "e1000_hw.h"
-
-#ifdef IGB_PROCFS
-#ifndef IGB_HWMON
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/proc_fs.h>
-#include <linux/device.h>
-#include <linux/netdevice.h>
-
-static struct proc_dir_entry *igb_top_dir = NULL;
-
-
-bool igb_thermal_present(struct igb_adapter *adapter)
-{
- s32 status;
- struct e1000_hw *hw;
-
- if (adapter == NULL)
- return false;
- hw = &adapter->hw;
-
- /*
- * Only set I2C bit-bang mode if an external thermal sensor is
- * supported on this device.
- */
- if (adapter->ets) {
- status = e1000_set_i2c_bb(hw);
- if (status != E1000_SUCCESS)
- return false;
- }
-
- status = hw->mac.ops.init_thermal_sensor_thresh(hw);
- if (status != E1000_SUCCESS)
- return false;
-
- return true;
-}
-
-
-static int igb_macburn(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- struct e1000_hw *hw;
- struct igb_adapter *adapter = (struct igb_adapter *)data;
- if (adapter == NULL)
- return snprintf(page, count, "error: no adapter\n");
-
- hw = &adapter->hw;
- if (hw == NULL)
- return snprintf(page, count, "error: no hw data\n");
-
- return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n",
- (unsigned int)hw->mac.perm_addr[0],
- (unsigned int)hw->mac.perm_addr[1],
- (unsigned int)hw->mac.perm_addr[2],
- (unsigned int)hw->mac.perm_addr[3],
- (unsigned int)hw->mac.perm_addr[4],
- (unsigned int)hw->mac.perm_addr[5]);
-}
-
-static int igb_macadmn(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- struct e1000_hw *hw;
- struct igb_adapter *adapter = (struct igb_adapter *)data;
- if (adapter == NULL)
- return snprintf(page, count, "error: no adapter\n");
-
- hw = &adapter->hw;
- if (hw == NULL)
- return snprintf(page, count, "error: no hw data\n");
-
- return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n",
- (unsigned int)hw->mac.addr[0],
- (unsigned int)hw->mac.addr[1],
- (unsigned int)hw->mac.addr[2],
- (unsigned int)hw->mac.addr[3],
- (unsigned int)hw->mac.addr[4],
- (unsigned int)hw->mac.addr[5]);
-}
-
-static int igb_numeports(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- struct e1000_hw *hw;
- int ports;
- struct igb_adapter *adapter = (struct igb_adapter *)data;
- if (adapter == NULL)
- return snprintf(page, count, "error: no adapter\n");
-
- hw = &adapter->hw;
- if (hw == NULL)
- return snprintf(page, count, "error: no hw data\n");
-
- ports = 4;
-
- return snprintf(page, count, "%d\n", ports);
-}
-
-static int igb_porttype(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- struct igb_adapter *adapter = (struct igb_adapter *)data;
- if (adapter == NULL)
- return snprintf(page, count, "error: no adapter\n");
-
- return snprintf(page, count, "%d\n",
- test_bit(__IGB_DOWN, &adapter->state));
-}
-
-static int igb_therm_location(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- struct igb_therm_proc_data *therm_data =
- (struct igb_therm_proc_data *)data;
-
- if (therm_data == NULL)
- return snprintf(page, count, "error: no therm_data\n");
-
- return snprintf(page, count, "%d\n", therm_data->sensor_data->location);
-}
-
-static int igb_therm_maxopthresh(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- struct igb_therm_proc_data *therm_data =
- (struct igb_therm_proc_data *)data;
-
- if (therm_data == NULL)
- return snprintf(page, count, "error: no therm_data\n");
-
- return snprintf(page, count, "%d\n",
- therm_data->sensor_data->max_op_thresh);
-}
-
-static int igb_therm_cautionthresh(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- struct igb_therm_proc_data *therm_data =
- (struct igb_therm_proc_data *)data;
-
- if (therm_data == NULL)
- return snprintf(page, count, "error: no therm_data\n");
-
- return snprintf(page, count, "%d\n",
- therm_data->sensor_data->caution_thresh);
-}
-
-static int igb_therm_temp(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- s32 status;
- struct igb_therm_proc_data *therm_data =
- (struct igb_therm_proc_data *)data;
-
- if (therm_data == NULL)
- return snprintf(page, count, "error: no therm_data\n");
-
- status = e1000_get_thermal_sensor_data(therm_data->hw);
- if (status != E1000_SUCCESS)
- snprintf(page, count, "error: status %d returned\n", status);
-
- return snprintf(page, count, "%d\n", therm_data->sensor_data->temp);
-}
-
-struct igb_proc_type{
- char name[32];
- int (*read)(char*, char**, off_t, int, int*, void*);
-};
-
-struct igb_proc_type igb_proc_entries[] = {
- {"numeports", &igb_numeports},
- {"porttype", &igb_porttype},
- {"macburn", &igb_macburn},
- {"macadmn", &igb_macadmn},
- {"", NULL}
-};
-
-struct igb_proc_type igb_internal_entries[] = {
- {"location", &igb_therm_location},
- {"temp", &igb_therm_temp},
- {"cautionthresh", &igb_therm_cautionthresh},
- {"maxopthresh", &igb_therm_maxopthresh},
- {"", NULL}
-};
-
-void igb_del_proc_entries(struct igb_adapter *adapter)
-{
- int index, i;
- char buf[16]; /* much larger than the sensor number will ever be */
-
- if (igb_top_dir == NULL)
- return;
-
- for (i = 0; i < E1000_MAX_SENSORS; i++) {
- if (adapter->therm_dir[i] == NULL)
- continue;
-
- for (index = 0; ; index++) {
- if (igb_internal_entries[index].read == NULL)
- break;
-
- remove_proc_entry(igb_internal_entries[index].name,
- adapter->therm_dir[i]);
- }
- snprintf(buf, sizeof(buf), "sensor_%d", i);
- remove_proc_entry(buf, adapter->info_dir);
- }
-
- if (adapter->info_dir != NULL) {
- for (index = 0; ; index++) {
- if (igb_proc_entries[index].read == NULL)
- break;
- remove_proc_entry(igb_proc_entries[index].name,
- adapter->info_dir);
- }
- remove_proc_entry("info", adapter->eth_dir);
- }
-
- if (adapter->eth_dir != NULL)
- remove_proc_entry(pci_name(adapter->pdev), igb_top_dir);
-}
-
-/* called from igb_main.c */
-void igb_procfs_exit(struct igb_adapter *adapter)
-{
- igb_del_proc_entries(adapter);
-}
-
-int igb_procfs_topdir_init(void)
-{
- igb_top_dir = proc_mkdir("driver/igb", NULL);
- if (igb_top_dir == NULL)
- return -ENOMEM;
-
- return 0;
-}
-
-void igb_procfs_topdir_exit(void)
-{
- remove_proc_entry("driver/igb", NULL);
-}
-
-/* called from igb_main.c */
-int igb_procfs_init(struct igb_adapter *adapter)
-{
- int rc = 0;
- int i;
- int index;
- char buf[16]; /* much larger than the sensor number will ever be */
-
- adapter->eth_dir = NULL;
- adapter->info_dir = NULL;
- for (i = 0; i < E1000_MAX_SENSORS; i++)
- adapter->therm_dir[i] = NULL;
-
- if ( igb_top_dir == NULL ) {
- rc = -ENOMEM;
- goto fail;
- }
-
- adapter->eth_dir = proc_mkdir(pci_name(adapter->pdev), igb_top_dir);
- if (adapter->eth_dir == NULL) {
- rc = -ENOMEM;
- goto fail;
- }
-
- adapter->info_dir = proc_mkdir("info", adapter->eth_dir);
- if (adapter->info_dir == NULL) {
- rc = -ENOMEM;
- goto fail;
- }
- for (index = 0; ; index++) {
- if (igb_proc_entries[index].read == NULL) {
- break;
- }
- if (!(create_proc_read_entry(igb_proc_entries[index].name,
- 0444,
- adapter->info_dir,
- igb_proc_entries[index].read,
- adapter))) {
-
- rc = -ENOMEM;
- goto fail;
- }
- }
- if (igb_thermal_present(adapter) == false)
- goto exit;
-
- for (i = 0; i < E1000_MAX_SENSORS; i++) {
-
- if (adapter->hw.mac.thermal_sensor_data.sensor[i].location== 0)
- continue;
-
- snprintf(buf, sizeof(buf), "sensor_%d", i);
- adapter->therm_dir[i] = proc_mkdir(buf, adapter->info_dir);
- if (adapter->therm_dir[i] == NULL) {
- rc = -ENOMEM;
- goto fail;
- }
- for (index = 0; ; index++) {
- if (igb_internal_entries[index].read == NULL)
- break;
- /*
- * therm_data struct contains pointer the read func
- * will be needing
- */
- adapter->therm_data[i].hw = &adapter->hw;
- adapter->therm_data[i].sensor_data =
- &adapter->hw.mac.thermal_sensor_data.sensor[i];
-
- if (!(create_proc_read_entry(
- igb_internal_entries[index].name,
- 0444,
- adapter->therm_dir[i],
- igb_internal_entries[index].read,
- &adapter->therm_data[i]))) {
- rc = -ENOMEM;
- goto fail;
- }
- }
- }
- goto exit;
-
-fail:
- igb_del_proc_entries(adapter);
-exit:
- return rc;
-}
-
-#endif /* !IGB_HWMON */
-#endif /* IGB_PROCFS */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c
deleted file mode 100644
index 454b70ce..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c
+++ /dev/null
@@ -1,944 +0,0 @@
-/*******************************************************************************
-
- Intel(R) Gigabit Ethernet Linux driver
- Copyright(c) 2007-2013 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-/******************************************************************************
- Copyright(c) 2011 Richard Cochran <richardcochran@gmail.com> for some of the
- 82576 and 82580 code
-******************************************************************************/
-
-#include "igb.h"
-
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/pci.h>
-#include <linux/ptp_classify.h>
-
-#define INCVALUE_MASK 0x7fffffff
-#define ISGN 0x80000000
-
-/*
- * The 82580 timesync updates the system timer every 8ns by 8ns,
- * and this update value cannot be reprogrammed.
- *
- * Neither the 82576 nor the 82580 offer registers wide enough to hold
- * nanoseconds time values for very long. For the 82580, SYSTIM always
- * counts nanoseconds, but the upper 24 bits are not available. The
- * frequency is adjusted by changing the 32 bit fractional nanoseconds
- * register, TIMINCA.
- *
- * For the 82576, the SYSTIM register time unit is affect by the
- * choice of the 24 bit TININCA:IV (incvalue) field. Five bits of this
- * field are needed to provide the nominal 16 nanosecond period,
- * leaving 19 bits for fractional nanoseconds.
- *
- * We scale the NIC clock cycle by a large factor so that relatively
- * small clock corrections can be added or subtracted at each clock
- * tick. The drawbacks of a large factor are a) that the clock
- * register overflows more quickly (not such a big deal) and b) that
- * the increment per tick has to fit into 24 bits. As a result we
- * need to use a shift of 19 so we can fit a value of 16 into the
- * TIMINCA register.
- *
- *
- * SYSTIMH SYSTIML
- * +--------------+ +---+---+------+
- * 82576 | 32 | | 8 | 5 | 19 |
- * +--------------+ +---+---+------+
- * \________ 45 bits _______/ fract
- *
- * +----------+---+ +--------------+
- * 82580 | 24 | 8 | | 32 |
- * +----------+---+ +--------------+
- * reserved \______ 40 bits _____/
- *
- *
- * The 45 bit 82576 SYSTIM overflows every
- * 2^45 * 10^-9 / 3600 = 9.77 hours.
- *
- * The 40 bit 82580 SYSTIM overflows every
- * 2^40 * 10^-9 / 60 = 18.3 minutes.
- */
-
-#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9)
-#define IGB_PTP_TX_TIMEOUT (HZ * 15)
-#define INCPERIOD_82576 (1 << E1000_TIMINCA_16NS_SHIFT)
-#define INCVALUE_82576_MASK ((1 << E1000_TIMINCA_16NS_SHIFT) - 1)
-#define INCVALUE_82576 (16 << IGB_82576_TSYNC_SHIFT)
-#define IGB_NBITS_82580 40
-
-/*
- * SYSTIM read access for the 82576
- */
-
-static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc)
-{
- struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
- struct e1000_hw *hw = &igb->hw;
- u64 val;
- u32 lo, hi;
-
- lo = E1000_READ_REG(hw, E1000_SYSTIML);
- hi = E1000_READ_REG(hw, E1000_SYSTIMH);
-
- val = ((u64) hi) << 32;
- val |= lo;
-
- return val;
-}
-
-/*
- * SYSTIM read access for the 82580
- */
-
-static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc)
-{
- struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
- struct e1000_hw *hw = &igb->hw;
- u64 val;
- u32 lo, hi;
-
- /* The timestamp latches on lowest register read. For the 82580
- * the lowest register is SYSTIMR instead of SYSTIML. However we only
- * need to provide nanosecond resolution, so we just ignore it.
- */
- E1000_READ_REG(hw, E1000_SYSTIMR);
- lo = E1000_READ_REG(hw, E1000_SYSTIML);
- hi = E1000_READ_REG(hw, E1000_SYSTIMH);
-
- val = ((u64) hi) << 32;
- val |= lo;
-
- return val;
-}
-
-/*
- * SYSTIM read access for I210/I211
- */
-
-static void igb_ptp_read_i210(struct igb_adapter *adapter, struct timespec *ts)
-{
- struct e1000_hw *hw = &adapter->hw;
- u32 sec, nsec;
-
- /* The timestamp latches on lowest register read. For I210/I211, the
- * lowest register is SYSTIMR. Since we only need to provide nanosecond
- * resolution, we can ignore it.
- */
- E1000_READ_REG(hw, E1000_SYSTIMR);
- nsec = E1000_READ_REG(hw, E1000_SYSTIML);
- sec = E1000_READ_REG(hw, E1000_SYSTIMH);
-
- ts->tv_sec = sec;
- ts->tv_nsec = nsec;
-}
-
-static void igb_ptp_write_i210(struct igb_adapter *adapter,
- const struct timespec *ts)
-{
- struct e1000_hw *hw = &adapter->hw;
-
- /*
- * Writing the SYSTIMR register is not necessary as it only provides
- * sub-nanosecond resolution.
- */
- E1000_WRITE_REG(hw, E1000_SYSTIML, ts->tv_nsec);
- E1000_WRITE_REG(hw, E1000_SYSTIMH, ts->tv_sec);
-}
-
-/**
- * igb_ptp_systim_to_hwtstamp - convert system time value to hw timestamp
- * @adapter: board private structure
- * @hwtstamps: timestamp structure to update
- * @systim: unsigned 64bit system time value.
- *
- * We need to convert the system time value stored in the RX/TXSTMP registers
- * into a hwtstamp which can be used by the upper level timestamping functions.
- *
- * The 'tmreg_lock' spinlock is used to protect the consistency of the
- * system time value. This is needed because reading the 64 bit time
- * value involves reading two (or three) 32 bit registers. The first
- * read latches the value. Ditto for writing.
- *
- * In addition, here have extended the system time with an overflow
- * counter in software.
- **/
-static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter,
- struct skb_shared_hwtstamps *hwtstamps,
- u64 systim)
-{
- unsigned long flags;
- u64 ns;
-
- switch (adapter->hw.mac.type) {
- case e1000_82576:
- case e1000_82580:
- case e1000_i350:
- case e1000_i354:
- spin_lock_irqsave(&adapter->tmreg_lock, flags);
-
- ns = timecounter_cyc2time(&adapter->tc, systim);
-
- spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
-
- memset(hwtstamps, 0, sizeof(*hwtstamps));
- hwtstamps->hwtstamp = ns_to_ktime(ns);
- break;
- case e1000_i210:
- case e1000_i211:
- memset(hwtstamps, 0, sizeof(*hwtstamps));
- /* Upper 32 bits contain s, lower 32 bits contain ns. */
- hwtstamps->hwtstamp = ktime_set(systim >> 32,
- systim & 0xFFFFFFFF);
- break;
- default:
- break;
- }
-}
-
-/*
- * PTP clock operations
- */
-
-static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb)
-{
- struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
- ptp_caps);
- struct e1000_hw *hw = &igb->hw;
- int neg_adj = 0;
- u64 rate;
- u32 incvalue;
-
- if (ppb < 0) {
- neg_adj = 1;
- ppb = -ppb;
- }
- rate = ppb;
- rate <<= 14;
- rate = div_u64(rate, 1953125);
-
- incvalue = 16 << IGB_82576_TSYNC_SHIFT;
-
- if (neg_adj)
- incvalue -= rate;
- else
- incvalue += rate;
-
- E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | (incvalue & INCVALUE_82576_MASK));
-
- return 0;
-}
-
-static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb)
-{
- struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
- ptp_caps);
- struct e1000_hw *hw = &igb->hw;
- int neg_adj = 0;
- u64 rate;
- u32 inca;
-
- if (ppb < 0) {
- neg_adj = 1;
- ppb = -ppb;
- }
- rate = ppb;
- rate <<= 26;
- rate = div_u64(rate, 1953125);
-
- /* At 2.5G speeds, the TIMINCA register on I354 updates the clock 2.5x
- * as quickly. Account for this by dividing the adjustment by 2.5.
- */
- if (hw->mac.type == e1000_i354) {
- u32 status = E1000_READ_REG(hw, E1000_STATUS);
-
- if ((status & E1000_STATUS_2P5_SKU) &&
- !(status & E1000_STATUS_2P5_SKU_OVER)) {
- rate <<= 1;
- rate = div_u64(rate, 5);
- }
- }
-
- inca = rate & INCVALUE_MASK;
- if (neg_adj)
- inca |= ISGN;
-
- E1000_WRITE_REG(hw, E1000_TIMINCA, inca);
-
- return 0;
-}
-
-static int igb_ptp_adjtime_82576(struct ptp_clock_info *ptp, s64 delta)
-{
- struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
- ptp_caps);
- unsigned long flags;
- s64 now;
-
- spin_lock_irqsave(&igb->tmreg_lock, flags);
-
- now = timecounter_read(&igb->tc);
- now += delta;
- timecounter_init(&igb->tc, &igb->cc, now);
-
- spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
- return 0;
-}
-
-static int igb_ptp_adjtime_i210(struct ptp_clock_info *ptp, s64 delta)
-{
- struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
- ptp_caps);
- unsigned long flags;
- struct timespec now, then = ns_to_timespec(delta);
-
- spin_lock_irqsave(&igb->tmreg_lock, flags);
-
- igb_ptp_read_i210(igb, &now);
- now = timespec_add(now, then);
- igb_ptp_write_i210(igb, (const struct timespec *)&now);
-
- spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
- return 0;
-}
-
-static int igb_ptp_gettime_82576(struct ptp_clock_info *ptp,
- struct timespec *ts)
-{
- struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
- ptp_caps);
- unsigned long flags;
- u64 ns;
- u32 remainder;
-
- spin_lock_irqsave(&igb->tmreg_lock, flags);
-
- ns = timecounter_read(&igb->tc);
-
- spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
- ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder);
- ts->tv_nsec = remainder;
-
- return 0;
-}
-
-static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp,
- struct timespec *ts)
-{
- struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
- ptp_caps);
- unsigned long flags;
-
- spin_lock_irqsave(&igb->tmreg_lock, flags);
-
- igb_ptp_read_i210(igb, ts);
-
- spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
- return 0;
-}
-
-static int igb_ptp_settime_82576(struct ptp_clock_info *ptp,
- const struct timespec *ts)
-{
- struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
- ptp_caps);
- unsigned long flags;
- u64 ns;
-
- ns = ts->tv_sec * 1000000000ULL;
- ns += ts->tv_nsec;
-
- spin_lock_irqsave(&igb->tmreg_lock, flags);
-
- timecounter_init(&igb->tc, &igb->cc, ns);
-
- spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
- return 0;
-}
-
-static int igb_ptp_settime_i210(struct ptp_clock_info *ptp,
- const struct timespec *ts)
-{
- struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
- ptp_caps);
- unsigned long flags;
-
- spin_lock_irqsave(&igb->tmreg_lock, flags);
-
- igb_ptp_write_i210(igb, ts);
-
- spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
- return 0;
-}
-
-static int igb_ptp_enable(struct ptp_clock_info *ptp,
- struct ptp_clock_request *rq, int on)
-{
- return -EOPNOTSUPP;
-}
-
-/**
- * igb_ptp_tx_work
- * @work: pointer to work struct
- *
- * This work function polls the TSYNCTXCTL valid bit to determine when a
- * timestamp has been taken for the current stored skb.
- */
-void igb_ptp_tx_work(struct work_struct *work)
-{
- struct igb_adapter *adapter = container_of(work, struct igb_adapter,
- ptp_tx_work);
- struct e1000_hw *hw = &adapter->hw;
- u32 tsynctxctl;
-
- if (!adapter->ptp_tx_skb)
- return;
-
- if (time_is_before_jiffies(adapter->ptp_tx_start +
- IGB_PTP_TX_TIMEOUT)) {
- dev_kfree_skb_any(adapter->ptp_tx_skb);
- adapter->ptp_tx_skb = NULL;
- adapter->tx_hwtstamp_timeouts++;
- dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang");
- return;
- }
-
- tsynctxctl = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
- if (tsynctxctl & E1000_TSYNCTXCTL_VALID)
- igb_ptp_tx_hwtstamp(adapter);
- else
- /* reschedule to check later */
- schedule_work(&adapter->ptp_tx_work);
-}
-
-static void igb_ptp_overflow_check(struct work_struct *work)
-{
- struct igb_adapter *igb =
- container_of(work, struct igb_adapter, ptp_overflow_work.work);
- struct timespec ts;
-
- igb->ptp_caps.gettime(&igb->ptp_caps, &ts);
-
- pr_debug("igb overflow check at %ld.%09lu\n", ts.tv_sec, ts.tv_nsec);
-
- schedule_delayed_work(&igb->ptp_overflow_work,
- IGB_SYSTIM_OVERFLOW_PERIOD);
-}
-
-/**
- * igb_ptp_rx_hang - detect error case when Rx timestamp registers latched
- * @adapter: private network adapter structure
- *
- * This watchdog task is scheduled to detect error case where hardware has
- * dropped an Rx packet that was timestamped when the ring is full. The
- * particular error is rare but leaves the device in a state unable to timestamp
- * any future packets.
- */
-void igb_ptp_rx_hang(struct igb_adapter *adapter)
-{
- struct e1000_hw *hw = &adapter->hw;
- struct igb_ring *rx_ring;
- u32 tsyncrxctl = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
- unsigned long rx_event;
- int n;
-
- if (hw->mac.type != e1000_82576)
- return;
-
- /* If we don't have a valid timestamp in the registers, just update the
- * timeout counter and exit
- */
- if (!(tsyncrxctl & E1000_TSYNCRXCTL_VALID)) {
- adapter->last_rx_ptp_check = jiffies;
- return;
- }
-
- /* Determine the most recent watchdog or rx_timestamp event */
- rx_event = adapter->last_rx_ptp_check;
- for (n = 0; n < adapter->num_rx_queues; n++) {
- rx_ring = adapter->rx_ring[n];
- if (time_after(rx_ring->last_rx_timestamp, rx_event))
- rx_event = rx_ring->last_rx_timestamp;
- }
-
- /* Only need to read the high RXSTMP register to clear the lock */
- if (time_is_before_jiffies(rx_event + 5 * HZ)) {
- E1000_READ_REG(hw, E1000_RXSTMPH);
- adapter->last_rx_ptp_check = jiffies;
- adapter->rx_hwtstamp_cleared++;
- dev_warn(&adapter->pdev->dev, "clearing Rx timestamp hang");
- }
-}
-
-/**
- * igb_ptp_tx_hwtstamp - utility function which checks for TX time stamp
- * @adapter: Board private structure.
- *
- * If we were asked to do hardware stamping and such a time stamp is
- * available, then it must have been for this skb here because we only
- * allow only one such packet into the queue.
- */
-void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
-{
- struct e1000_hw *hw = &adapter->hw;
- struct skb_shared_hwtstamps shhwtstamps;
- u64 regval;
-
- regval = E1000_READ_REG(hw, E1000_TXSTMPL);
- regval |= (u64)E1000_READ_REG(hw, E1000_TXSTMPH) << 32;
-
- igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
- skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps);
- dev_kfree_skb_any(adapter->ptp_tx_skb);
- adapter->ptp_tx_skb = NULL;
-}
-
-/**
- * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp
- * @q_vector: Pointer to interrupt specific structure
- * @va: Pointer to address containing Rx buffer
- * @skb: Buffer containing timestamp and packet
- *
- * This function is meant to retrieve a timestamp from the first buffer of an
- * incoming frame. The value is stored in little endian format starting on
- * byte 8.
- */
-void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector,
- unsigned char *va,
- struct sk_buff *skb)
-{
- __le64 *regval = (__le64 *)va;
-
- /*
- * The timestamp is recorded in little endian format.
- * DWORD: 0 1 2 3
- * Field: Reserved Reserved SYSTIML SYSTIMH
- */
- igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb),
- le64_to_cpu(regval[1]));
-}
-
-/**
- * igb_ptp_rx_rgtstamp - retrieve Rx timestamp stored in register
- * @q_vector: Pointer to interrupt specific structure
- * @skb: Buffer containing timestamp and packet
- *
- * This function is meant to retrieve a timestamp from the internal registers
- * of the adapter and store it in the skb.
- */
-void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector,
- struct sk_buff *skb)
-{
- struct igb_adapter *adapter = q_vector->adapter;
- struct e1000_hw *hw = &adapter->hw;
- u64 regval;
-
- /*
- * If this bit is set, then the RX registers contain the time stamp. No
- * other packet will be time stamped until we read these registers, so
- * read the registers to make them available again. Because only one
- * packet can be time stamped at a time, we know that the register
- * values must belong to this one here and therefore we don't need to
- * compare any of the additional attributes stored for it.
- *
- * If nothing went wrong, then it should have a shared tx_flags that we
- * can turn into a skb_shared_hwtstamps.
- */
- if (!(E1000_READ_REG(hw, E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
- return;
-
- regval = E1000_READ_REG(hw, E1000_RXSTMPL);
- regval |= (u64)E1000_READ_REG(hw, E1000_RXSTMPH) << 32;
-
- igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
-}
-
-/**
- * igb_ptp_hwtstamp_ioctl - control hardware time stamping
- * @netdev:
- * @ifreq:
- * @cmd:
- *
- * Outgoing time stamping can be enabled and disabled. Play nice and
- * disable it when requested, although it shouldn't case any overhead
- * when no packet needs it. At most one packet in the queue may be
- * marked for time stamping, otherwise it would be impossible to tell
- * for sure to which packet the hardware time stamp belongs.
- *
- * Incoming time stamping has to be configured via the hardware
- * filters. Not all combinations are supported, in particular event
- * type has to be specified. Matching the kind of event packet is
- * not supported, with the exception of "all V2 events regardless of
- * level 2 or 4".
- *
- **/
-int igb_ptp_hwtstamp_ioctl(struct net_device *netdev,
- struct ifreq *ifr, int cmd)
-{
- struct igb_adapter *adapter = netdev_priv(netdev);
- struct e1000_hw *hw = &adapter->hw;
- struct hwtstamp_config config;
- u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
- u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
- u32 tsync_rx_cfg = 0;
- bool is_l4 = false;
- bool is_l2 = false;
- u32 regval;
-
- if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
- return -EFAULT;
-
- /* reserved for future extensions */
- if (config.flags)
- return -EINVAL;
-
- switch (config.tx_type) {
- case HWTSTAMP_TX_OFF:
- tsync_tx_ctl = 0;
- case HWTSTAMP_TX_ON:
- break;
- default:
- return -ERANGE;
- }
-
- switch (config.rx_filter) {
- case HWTSTAMP_FILTER_NONE:
- tsync_rx_ctl = 0;
- break;
- case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
- tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
- tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
- is_l4 = true;
- break;
- case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
- tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
- tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
- is_l4 = true;
- break;
- case HWTSTAMP_FILTER_PTP_V2_EVENT:
- case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
- case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
- case HWTSTAMP_FILTER_PTP_V2_SYNC:
- case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
- case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
- case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
- case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
- case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
- tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
- config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
- is_l2 = true;
- is_l4 = true;
- break;
- case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
- case HWTSTAMP_FILTER_ALL:
- /*
- * 82576 cannot timestamp all packets, which it needs to do to
- * support both V1 Sync and Delay_Req messages
- */
- if (hw->mac.type != e1000_82576) {
- tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
- config.rx_filter = HWTSTAMP_FILTER_ALL;
- break;
- }
- /* fall through */
- default:
- config.rx_filter = HWTSTAMP_FILTER_NONE;
- return -ERANGE;
- }
-
- if (hw->mac.type == e1000_82575) {
- if (tsync_rx_ctl | tsync_tx_ctl)
- return -EINVAL;
- return 0;
- }
-
- /*
- * Per-packet timestamping only works if all packets are
- * timestamped, so enable timestamping in all packets as
- * long as one rx filter was configured.
- */
- if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
- tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
- tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
- config.rx_filter = HWTSTAMP_FILTER_ALL;
- is_l2 = true;
- is_l4 = true;
-
- if ((hw->mac.type == e1000_i210) ||
- (hw->mac.type == e1000_i211)) {
- regval = E1000_READ_REG(hw, E1000_RXPBS);
- regval |= E1000_RXPBS_CFG_TS_EN;
- E1000_WRITE_REG(hw, E1000_RXPBS, regval);
- }
- }
-
- /* enable/disable TX */
- regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
- regval &= ~E1000_TSYNCTXCTL_ENABLED;
- regval |= tsync_tx_ctl;
- E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval);
-
- /* enable/disable RX */
- regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
- regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
- regval |= tsync_rx_ctl;
- E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval);
-
- /* define which PTP packets are time stamped */
- E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg);
-
- /* define ethertype filter for timestamped packets */
- if (is_l2)
- E1000_WRITE_REG(hw, E1000_ETQF(3),
- (E1000_ETQF_FILTER_ENABLE | /* enable filter */
- E1000_ETQF_1588 | /* enable timestamping */
- ETH_P_1588)); /* 1588 eth protocol type */
- else
- E1000_WRITE_REG(hw, E1000_ETQF(3), 0);
-
- /* L4 Queue Filter[3]: filter by destination port and protocol */
- if (is_l4) {
- u32 ftqf = (IPPROTO_UDP /* UDP */
- | E1000_FTQF_VF_BP /* VF not compared */
- | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
- | E1000_FTQF_MASK); /* mask all inputs */
- ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
-
- E1000_WRITE_REG(hw, E1000_IMIR(3), htons(PTP_EV_PORT));
- E1000_WRITE_REG(hw, E1000_IMIREXT(3),
- (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
- if (hw->mac.type == e1000_82576) {
- /* enable source port check */
- E1000_WRITE_REG(hw, E1000_SPQF(3), htons(PTP_EV_PORT));
- ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
- }
- E1000_WRITE_REG(hw, E1000_FTQF(3), ftqf);
- } else {
- E1000_WRITE_REG(hw, E1000_FTQF(3), E1000_FTQF_MASK);
- }
- E1000_WRITE_FLUSH(hw);
-
- /* clear TX/RX time stamp registers, just to be sure */
- regval = E1000_READ_REG(hw, E1000_TXSTMPL);
- regval = E1000_READ_REG(hw, E1000_TXSTMPH);
- regval = E1000_READ_REG(hw, E1000_RXSTMPL);
- regval = E1000_READ_REG(hw, E1000_RXSTMPH);
-
- return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
- -EFAULT : 0;
-}
-
-void igb_ptp_init(struct igb_adapter *adapter)
-{
- struct e1000_hw *hw = &adapter->hw;
- struct net_device *netdev = adapter->netdev;
-
- switch (hw->mac.type) {
- case e1000_82576:
- snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
- adapter->ptp_caps.owner = THIS_MODULE;
- adapter->ptp_caps.max_adj = 999999881;
- adapter->ptp_caps.n_ext_ts = 0;
- adapter->ptp_caps.pps = 0;
- adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576;
- adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
- adapter->ptp_caps.gettime = igb_ptp_gettime_82576;
- adapter->ptp_caps.settime = igb_ptp_settime_82576;
- adapter->ptp_caps.enable = igb_ptp_enable;
- adapter->cc.read = igb_ptp_read_82576;
- adapter->cc.mask = CLOCKSOURCE_MASK(64);
- adapter->cc.mult = 1;
- adapter->cc.shift = IGB_82576_TSYNC_SHIFT;
- /* Dial the nominal frequency. */
- E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 |
- INCVALUE_82576);
- break;
- case e1000_82580:
- case e1000_i350:
- case e1000_i354:
- snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
- adapter->ptp_caps.owner = THIS_MODULE;
- adapter->ptp_caps.max_adj = 62499999;
- adapter->ptp_caps.n_ext_ts = 0;
- adapter->ptp_caps.pps = 0;
- adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580;
- adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
- adapter->ptp_caps.gettime = igb_ptp_gettime_82576;
- adapter->ptp_caps.settime = igb_ptp_settime_82576;
- adapter->ptp_caps.enable = igb_ptp_enable;
- adapter->cc.read = igb_ptp_read_82580;
- adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580);
- adapter->cc.mult = 1;
- adapter->cc.shift = 0;
- /* Enable the timer functions by clearing bit 31. */
- E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
- break;
- case e1000_i210:
- case e1000_i211:
- snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
- adapter->ptp_caps.owner = THIS_MODULE;
- adapter->ptp_caps.max_adj = 62499999;
- adapter->ptp_caps.n_ext_ts = 0;
- adapter->ptp_caps.pps = 0;
- adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580;
- adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210;
- adapter->ptp_caps.gettime = igb_ptp_gettime_i210;
- adapter->ptp_caps.settime = igb_ptp_settime_i210;
- adapter->ptp_caps.enable = igb_ptp_enable;
- /* Enable the timer functions by clearing bit 31. */
- E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
- break;
- default:
- adapter->ptp_clock = NULL;
- return;
- }
-
- E1000_WRITE_FLUSH(hw);
-
- spin_lock_init(&adapter->tmreg_lock);
- INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
-
- /* Initialize the clock and overflow work for devices that need it. */
- if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) {
- struct timespec ts = ktime_to_timespec(ktime_get_real());
-
- igb_ptp_settime_i210(&adapter->ptp_caps, &ts);
- } else {
- timecounter_init(&adapter->tc, &adapter->cc,
- ktime_to_ns(ktime_get_real()));
-
- INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
- igb_ptp_overflow_check);
-
- schedule_delayed_work(&adapter->ptp_overflow_work,
- IGB_SYSTIM_OVERFLOW_PERIOD);
- }
-
- /* Initialize the time sync interrupts for devices that support it. */
- if (hw->mac.type >= e1000_82580) {
- E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS);
- E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS);
- }
-
- adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
- &adapter->pdev->dev);
- if (IS_ERR(adapter->ptp_clock)) {
- adapter->ptp_clock = NULL;
- dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n");
- } else {
- dev_info(&adapter->pdev->dev, "added PHC on %s\n",
- adapter->netdev->name);
- adapter->flags |= IGB_FLAG_PTP;
- }
-}
-
-/**
- * igb_ptp_stop - Disable PTP device and stop the overflow check.
- * @adapter: Board private structure.
- *
- * This function stops the PTP support and cancels the delayed work.
- **/
-void igb_ptp_stop(struct igb_adapter *adapter)
-{
- switch (adapter->hw.mac.type) {
- case e1000_82576:
- case e1000_82580:
- case e1000_i350:
- case e1000_i354:
- cancel_delayed_work_sync(&adapter->ptp_overflow_work);
- break;
- case e1000_i210:
- case e1000_i211:
- /* No delayed work to cancel. */
- break;
- default:
- return;
- }
-
- cancel_work_sync(&adapter->ptp_tx_work);
- if (adapter->ptp_tx_skb) {
- dev_kfree_skb_any(adapter->ptp_tx_skb);
- adapter->ptp_tx_skb = NULL;
- }
-
- if (adapter->ptp_clock) {
- ptp_clock_unregister(adapter->ptp_clock);
- dev_info(&adapter->pdev->dev, "removed PHC on %s\n",
- adapter->netdev->name);
- adapter->flags &= ~IGB_FLAG_PTP;
- }
-}
-
-/**
- * igb_ptp_reset - Re-enable the adapter for PTP following a reset.
- * @adapter: Board private structure.
- *
- * This function handles the reset work required to re-enable the PTP device.
- **/
-void igb_ptp_reset(struct igb_adapter *adapter)
-{
- struct e1000_hw *hw = &adapter->hw;
-
- if (!(adapter->flags & IGB_FLAG_PTP))
- return;
-
- switch (adapter->hw.mac.type) {
- case e1000_82576:
- /* Dial the nominal frequency. */
- E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 |
- INCVALUE_82576);
- break;
- case e1000_82580:
- case e1000_i350:
- case e1000_i354:
- case e1000_i210:
- case e1000_i211:
- /* Enable the timer functions and interrupts. */
- E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
- E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS);
- E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS);
- break;
- default:
- /* No work to do. */
- return;
- }
-
- /* Re-initialize the timer. */
- if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) {
- struct timespec ts = ktime_to_timespec(ktime_get_real());
-
- igb_ptp_settime_i210(&adapter->ptp_caps, &ts);
- } else {
- timecounter_init(&adapter->tc, &adapter->cc,
- ktime_to_ns(ktime_get_real()));
- }
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h
index 18da64a3..9d49b45e 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c
index 015c8952..205da562 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h
index e51e7c4e..c6d4c568 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c
deleted file mode 100644
index bde3a83c..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c
+++ /dev/null
@@ -1,1482 +0,0 @@
-/*******************************************************************************
-
- Intel(R) Gigabit Ethernet Linux driver
- Copyright(c) 2007-2013 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "igb.h"
-#include "kcompat.h"
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) )
-/* From lib/vsprintf.c */
-#include <asm/div64.h>
-
-static int skip_atoi(const char **s)
-{
- int i=0;
-
- while (isdigit(**s))
- i = i*10 + *((*s)++) - '0';
- return i;
-}
-
-#define _kc_ZEROPAD 1 /* pad with zero */
-#define _kc_SIGN 2 /* unsigned/signed long */
-#define _kc_PLUS 4 /* show plus */
-#define _kc_SPACE 8 /* space if plus */
-#define _kc_LEFT 16 /* left justified */
-#define _kc_SPECIAL 32 /* 0x */
-#define _kc_LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
-
-static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type)
-{
- char c,sign,tmp[66];
- const char *digits;
- const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
- const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
- int i;
-
- digits = (type & _kc_LARGE) ? large_digits : small_digits;
- if (type & _kc_LEFT)
- type &= ~_kc_ZEROPAD;
- if (base < 2 || base > 36)
- return 0;
- c = (type & _kc_ZEROPAD) ? '0' : ' ';
- sign = 0;
- if (type & _kc_SIGN) {
- if (num < 0) {
- sign = '-';
- num = -num;
- size--;
- } else if (type & _kc_PLUS) {
- sign = '+';
- size--;
- } else if (type & _kc_SPACE) {
- sign = ' ';
- size--;
- }
- }
- if (type & _kc_SPECIAL) {
- if (base == 16)
- size -= 2;
- else if (base == 8)
- size--;
- }
- i = 0;
- if (num == 0)
- tmp[i++]='0';
- else while (num != 0)
- tmp[i++] = digits[do_div(num,base)];
- if (i > precision)
- precision = i;
- size -= precision;
- if (!(type&(_kc_ZEROPAD+_kc_LEFT))) {
- while(size-->0) {
- if (buf <= end)
- *buf = ' ';
- ++buf;
- }
- }
- if (sign) {
- if (buf <= end)
- *buf = sign;
- ++buf;
- }
- if (type & _kc_SPECIAL) {
- if (base==8) {
- if (buf <= end)
- *buf = '0';
- ++buf;
- } else if (base==16) {
- if (buf <= end)
- *buf = '0';
- ++buf;
- if (buf <= end)
- *buf = digits[33];
- ++buf;
- }
- }
- if (!(type & _kc_LEFT)) {
- while (size-- > 0) {
- if (buf <= end)
- *buf = c;
- ++buf;
- }
- }
- while (i < precision--) {
- if (buf <= end)
- *buf = '0';
- ++buf;
- }
- while (i-- > 0) {
- if (buf <= end)
- *buf = tmp[i];
- ++buf;
- }
- while (size-- > 0) {
- if (buf <= end)
- *buf = ' ';
- ++buf;
- }
- return buf;
-}
-
-int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
-{
- int len;
- unsigned long long num;
- int i, base;
- char *str, *end, c;
- const char *s;
-
- int flags; /* flags to number() */
-
- int field_width; /* width of output field */
- int precision; /* min. # of digits for integers; max
- number of chars for from string */
- int qualifier; /* 'h', 'l', or 'L' for integer fields */
- /* 'z' support added 23/7/1999 S.H. */
- /* 'z' changed to 'Z' --davidm 1/25/99 */
-
- str = buf;
- end = buf + size - 1;
-
- if (end < buf - 1) {
- end = ((void *) -1);
- size = end - buf + 1;
- }
-
- for (; *fmt ; ++fmt) {
- if (*fmt != '%') {
- if (str <= end)
- *str = *fmt;
- ++str;
- continue;
- }
-
- /* process flags */
- flags = 0;
- repeat:
- ++fmt; /* this also skips first '%' */
- switch (*fmt) {
- case '-': flags |= _kc_LEFT; goto repeat;
- case '+': flags |= _kc_PLUS; goto repeat;
- case ' ': flags |= _kc_SPACE; goto repeat;
- case '#': flags |= _kc_SPECIAL; goto repeat;
- case '0': flags |= _kc_ZEROPAD; goto repeat;
- }
-
- /* get field width */
- field_width = -1;
- if (isdigit(*fmt))
- field_width = skip_atoi(&fmt);
- else if (*fmt == '*') {
- ++fmt;
- /* it's the next argument */
- field_width = va_arg(args, int);
- if (field_width < 0) {
- field_width = -field_width;
- flags |= _kc_LEFT;
- }
- }
-
- /* get the precision */
- precision = -1;
- if (*fmt == '.') {
- ++fmt;
- if (isdigit(*fmt))
- precision = skip_atoi(&fmt);
- else if (*fmt == '*') {
- ++fmt;
- /* it's the next argument */
- precision = va_arg(args, int);
- }
- if (precision < 0)
- precision = 0;
- }
-
- /* get the conversion qualifier */
- qualifier = -1;
- if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
- qualifier = *fmt;
- ++fmt;
- }
-
- /* default base */
- base = 10;
-
- switch (*fmt) {
- case 'c':
- if (!(flags & _kc_LEFT)) {
- while (--field_width > 0) {
- if (str <= end)
- *str = ' ';
- ++str;
- }
- }
- c = (unsigned char) va_arg(args, int);
- if (str <= end)
- *str = c;
- ++str;
- while (--field_width > 0) {
- if (str <= end)
- *str = ' ';
- ++str;
- }
- continue;
-
- case 's':
- s = va_arg(args, char *);
- if (!s)
- s = "<NULL>";
-
- len = strnlen(s, precision);
-
- if (!(flags & _kc_LEFT)) {
- while (len < field_width--) {
- if (str <= end)
- *str = ' ';
- ++str;
- }
- }
- for (i = 0; i < len; ++i) {
- if (str <= end)
- *str = *s;
- ++str; ++s;
- }
- while (len < field_width--) {
- if (str <= end)
- *str = ' ';
- ++str;
- }
- continue;
-
- case 'p':
- if (field_width == -1) {
- field_width = 2*sizeof(void *);
- flags |= _kc_ZEROPAD;
- }
- str = number(str, end,
- (unsigned long) va_arg(args, void *),
- 16, field_width, precision, flags);
- continue;
-
-
- case 'n':
- /* FIXME:
- * What does C99 say about the overflow case here? */
- if (qualifier == 'l') {
- long * ip = va_arg(args, long *);
- *ip = (str - buf);
- } else if (qualifier == 'Z') {
- size_t * ip = va_arg(args, size_t *);
- *ip = (str - buf);
- } else {
- int * ip = va_arg(args, int *);
- *ip = (str - buf);
- }
- continue;
-
- case '%':
- if (str <= end)
- *str = '%';
- ++str;
- continue;
-
- /* integer number formats - set up the flags and "break" */
- case 'o':
- base = 8;
- break;
-
- case 'X':
- flags |= _kc_LARGE;
- case 'x':
- base = 16;
- break;
-
- case 'd':
- case 'i':
- flags |= _kc_SIGN;
- case 'u':
- break;
-
- default:
- if (str <= end)
- *str = '%';
- ++str;
- if (*fmt) {
- if (str <= end)
- *str = *fmt;
- ++str;
- } else {
- --fmt;
- }
- continue;
- }
- if (qualifier == 'L')
- num = va_arg(args, long long);
- else if (qualifier == 'l') {
- num = va_arg(args, unsigned long);
- if (flags & _kc_SIGN)
- num = (signed long) num;
- } else if (qualifier == 'Z') {
- num = va_arg(args, size_t);
- } else if (qualifier == 'h') {
- num = (unsigned short) va_arg(args, int);
- if (flags & _kc_SIGN)
- num = (signed short) num;
- } else {
- num = va_arg(args, unsigned int);
- if (flags & _kc_SIGN)
- num = (signed int) num;
- }
- str = number(str, end, num, base,
- field_width, precision, flags);
- }
- if (str <= end)
- *str = '\0';
- else if (size > 0)
- /* don't write out a null byte if the buf size is zero */
- *end = '\0';
- /* the trailing null byte doesn't count towards the total
- * ++str;
- */
- return str-buf;
-}
-
-int _kc_snprintf(char * buf, size_t size, const char *fmt, ...)
-{
- va_list args;
- int i;
-
- va_start(args, fmt);
- i = _kc_vsnprintf(buf,size,fmt,args);
- va_end(args);
- return i;
-}
-#endif /* < 2.4.8 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) )
-
-/**************************************/
-/* PCI DMA MAPPING */
-
-#if defined(CONFIG_HIGHMEM)
-
-#ifndef PCI_DRAM_OFFSET
-#define PCI_DRAM_OFFSET 0
-#endif
-
-u64
-_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
- size_t size, int direction)
-{
- return (((u64) (page - mem_map) << PAGE_SHIFT) + offset +
- PCI_DRAM_OFFSET);
-}
-
-#else /* CONFIG_HIGHMEM */
-
-u64
-_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
- size_t size, int direction)
-{
- return pci_map_single(dev, (void *)page_address(page) + offset, size,
- direction);
-}
-
-#endif /* CONFIG_HIGHMEM */
-
-void
-_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size,
- int direction)
-{
- return pci_unmap_single(dev, dma_addr, size, direction);
-}
-
-#endif /* 2.4.13 => 2.4.3 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) )
-
-/**************************************/
-/* PCI DRIVER API */
-
-int
-_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask)
-{
- if (!pci_dma_supported(dev, mask))
- return -EIO;
- dev->dma_mask = mask;
- return 0;
-}
-
-int
-_kc_pci_request_regions(struct pci_dev *dev, char *res_name)
-{
- int i;
-
- for (i = 0; i < 6; i++) {
- if (pci_resource_len(dev, i) == 0)
- continue;
-
- if (pci_resource_flags(dev, i) & IORESOURCE_IO) {
- if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
- pci_release_regions(dev);
- return -EBUSY;
- }
- } else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) {
- if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
- pci_release_regions(dev);
- return -EBUSY;
- }
- }
- }
- return 0;
-}
-
-void
-_kc_pci_release_regions(struct pci_dev *dev)
-{
- int i;
-
- for (i = 0; i < 6; i++) {
- if (pci_resource_len(dev, i) == 0)
- continue;
-
- if (pci_resource_flags(dev, i) & IORESOURCE_IO)
- release_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
-
- else if (pci_resource_flags(dev, i) & IORESOURCE_MEM)
- release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
- }
-}
-
-/**************************************/
-/* NETWORK DRIVER API */
-
-struct net_device *
-_kc_alloc_etherdev(int sizeof_priv)
-{
- struct net_device *dev;
- int alloc_size;
-
- alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31;
- dev = kzalloc(alloc_size, GFP_KERNEL);
- if (!dev)
- return NULL;
-
- if (sizeof_priv)
- dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31);
- dev->name[0] = '\0';
- ether_setup(dev);
-
- return dev;
-}
-
-int
-_kc_is_valid_ether_addr(u8 *addr)
-{
- const char zaddr[6] = { 0, };
-
- return !(addr[0] & 1) && memcmp(addr, zaddr, 6);
-}
-
-#endif /* 2.4.3 => 2.4.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) )
-
-int
-_kc_pci_set_power_state(struct pci_dev *dev, int state)
-{
- return 0;
-}
-
-int
-_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable)
-{
- return 0;
-}
-
-#endif /* 2.4.6 => 2.4.3 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
-void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page,
- int off, int size)
-{
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- frag->page = page;
- frag->page_offset = off;
- frag->size = size;
- skb_shinfo(skb)->nr_frags = i + 1;
-}
-
-/*
- * Original Copyright:
- * find_next_bit.c: fallback find next bit implementation
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- */
-
-/**
- * find_next_bit - find the next set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
- unsigned long offset)
-{
- const unsigned long *p = addr + BITOP_WORD(offset);
- unsigned long result = offset & ~(BITS_PER_LONG-1);
- unsigned long tmp;
-
- if (offset >= size)
- return size;
- size -= result;
- offset %= BITS_PER_LONG;
- if (offset) {
- tmp = *(p++);
- tmp &= (~0UL << offset);
- if (size < BITS_PER_LONG)
- goto found_first;
- if (tmp)
- goto found_middle;
- size -= BITS_PER_LONG;
- result += BITS_PER_LONG;
- }
- while (size & ~(BITS_PER_LONG-1)) {
- if ((tmp = *(p++)))
- goto found_middle;
- result += BITS_PER_LONG;
- size -= BITS_PER_LONG;
- }
- if (!size)
- return result;
- tmp = *p;
-
-found_first:
- tmp &= (~0UL >> (BITS_PER_LONG - size));
- if (tmp == 0UL) /* Are any bits set? */
- return result + size; /* Nope. */
-found_middle:
- return result + ffs(tmp);
-}
-
-size_t _kc_strlcpy(char *dest, const char *src, size_t size)
-{
- size_t ret = strlen(src);
-
- if (size) {
- size_t len = (ret >= size) ? size - 1 : ret;
- memcpy(dest, src, len);
- dest[len] = '\0';
- }
- return ret;
-}
-
-#ifndef do_div
-#if BITS_PER_LONG == 32
-uint32_t __attribute__((weak)) _kc__div64_32(uint64_t *n, uint32_t base)
-{
- uint64_t rem = *n;
- uint64_t b = base;
- uint64_t res, d = 1;
- uint32_t high = rem >> 32;
-
- /* Reduce the thing a bit first */
- res = 0;
- if (high >= base) {
- high /= base;
- res = (uint64_t) high << 32;
- rem -= (uint64_t) (high*base) << 32;
- }
-
- while ((int64_t)b > 0 && b < rem) {
- b = b+b;
- d = d+d;
- }
-
- do {
- if (rem >= b) {
- rem -= b;
- res += d;
- }
- b >>= 1;
- d >>= 1;
- } while (d);
-
- *n = res;
- return rem;
-}
-#endif /* BITS_PER_LONG == 32 */
-#endif /* do_div */
-#endif /* 2.6.0 => 2.4.6 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
-int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...)
-{
- va_list args;
- int i;
-
- va_start(args, fmt);
- i = vsnprintf(buf, size, fmt, args);
- va_end(args);
- return (i >= size) ? (size - 1) : i;
-}
-#endif /* < 2.6.4 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) )
-DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES) = {1};
-#endif /* < 2.6.10 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) )
-char *_kc_kstrdup(const char *s, unsigned int gfp)
-{
- size_t len;
- char *buf;
-
- if (!s)
- return NULL;
-
- len = strlen(s) + 1;
- buf = kmalloc(len, gfp);
- if (buf)
- memcpy(buf, s, len);
- return buf;
-}
-#endif /* < 2.6.13 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) )
-void *_kc_kzalloc(size_t size, int flags)
-{
- void *ret = kmalloc(size, flags);
- if (ret)
- memset(ret, 0, size);
- return ret;
-}
-#endif /* <= 2.6.13 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) )
-int _kc_skb_pad(struct sk_buff *skb, int pad)
-{
- int ntail;
-
- /* If the skbuff is non linear tailroom is always zero.. */
- if(!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
- memset(skb->data+skb->len, 0, pad);
- return 0;
- }
-
- ntail = skb->data_len + pad - (skb->end - skb->tail);
- if (likely(skb_cloned(skb) || ntail > 0)) {
- if (pskb_expand_head(skb, 0, ntail, GFP_ATOMIC));
- goto free_skb;
- }
-
-#ifdef MAX_SKB_FRAGS
- if (skb_is_nonlinear(skb) &&
- !__pskb_pull_tail(skb, skb->data_len))
- goto free_skb;
-
-#endif
- memset(skb->data + skb->len, 0, pad);
- return 0;
-
-free_skb:
- kfree_skb(skb);
- return -ENOMEM;
-}
-
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))
-int _kc_pci_save_state(struct pci_dev *pdev)
-{
- struct net_device *netdev = pci_get_drvdata(pdev);
- struct adapter_struct *adapter = netdev_priv(netdev);
- int size = PCI_CONFIG_SPACE_LEN, i;
- u16 pcie_cap_offset, pcie_link_status;
-
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
- /* no ->dev for 2.4 kernels */
- WARN_ON(pdev->dev.driver_data == NULL);
-#endif
- pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
- if (pcie_cap_offset) {
- if (!pci_read_config_word(pdev,
- pcie_cap_offset + PCIE_LINK_STATUS,
- &pcie_link_status))
- size = PCIE_CONFIG_SPACE_LEN;
- }
- pci_config_space_ich8lan();
-#ifdef HAVE_PCI_ERS
- if (adapter->config_space == NULL)
-#else
- WARN_ON(adapter->config_space != NULL);
-#endif
- adapter->config_space = kmalloc(size, GFP_KERNEL);
- if (!adapter->config_space) {
- printk(KERN_ERR "Out of memory in pci_save_state\n");
- return -ENOMEM;
- }
- for (i = 0; i < (size / 4); i++)
- pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]);
- return 0;
-}
-
-void _kc_pci_restore_state(struct pci_dev *pdev)
-{
- struct net_device *netdev = pci_get_drvdata(pdev);
- struct adapter_struct *adapter = netdev_priv(netdev);
- int size = PCI_CONFIG_SPACE_LEN, i;
- u16 pcie_cap_offset;
- u16 pcie_link_status;
-
- if (adapter->config_space != NULL) {
- pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
- if (pcie_cap_offset &&
- !pci_read_config_word(pdev,
- pcie_cap_offset + PCIE_LINK_STATUS,
- &pcie_link_status))
- size = PCIE_CONFIG_SPACE_LEN;
-
- pci_config_space_ich8lan();
- for (i = 0; i < (size / 4); i++)
- pci_write_config_dword(pdev, i * 4, adapter->config_space[i]);
-#ifndef HAVE_PCI_ERS
- kfree(adapter->config_space);
- adapter->config_space = NULL;
-#endif
- }
-}
-#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */
-
-#ifdef HAVE_PCI_ERS
-void _kc_free_netdev(struct net_device *netdev)
-{
- struct adapter_struct *adapter = netdev_priv(netdev);
-
- if (adapter->config_space != NULL)
- kfree(adapter->config_space);
-#ifdef CONFIG_SYSFS
- if (netdev->reg_state == NETREG_UNINITIALIZED) {
- kfree((char *)netdev - netdev->padded);
- } else {
- BUG_ON(netdev->reg_state != NETREG_UNREGISTERED);
- netdev->reg_state = NETREG_RELEASED;
- class_device_put(&netdev->class_dev);
- }
-#else
- kfree((char *)netdev - netdev->padded);
-#endif
-}
-#endif
-
-void *_kc_kmemdup(const void *src, size_t len, unsigned gfp)
-{
- void *p;
-
- p = kzalloc(len, gfp);
- if (p)
- memcpy(p, src, len);
- return p;
-}
-#endif /* <= 2.6.19 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
-struct pci_dev *_kc_netdev_to_pdev(struct net_device *netdev)
-{
- return ((struct adapter_struct *)netdev_priv(netdev))->pdev;
-}
-#endif /* < 2.6.21 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
-/* hexdump code taken from lib/hexdump.c */
-static void _kc_hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
- int groupsize, unsigned char *linebuf,
- size_t linebuflen, bool ascii)
-{
- const u8 *ptr = buf;
- u8 ch;
- int j, lx = 0;
- int ascii_column;
-
- if (rowsize != 16 && rowsize != 32)
- rowsize = 16;
-
- if (!len)
- goto nil;
- if (len > rowsize) /* limit to one line at a time */
- len = rowsize;
- if ((len % groupsize) != 0) /* no mixed size output */
- groupsize = 1;
-
- switch (groupsize) {
- case 8: {
- const u64 *ptr8 = buf;
- int ngroups = len / groupsize;
-
- for (j = 0; j < ngroups; j++)
- lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
- "%s%16.16llx", j ? " " : "",
- (unsigned long long)*(ptr8 + j));
- ascii_column = 17 * ngroups + 2;
- break;
- }
-
- case 4: {
- const u32 *ptr4 = buf;
- int ngroups = len / groupsize;
-
- for (j = 0; j < ngroups; j++)
- lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
- "%s%8.8x", j ? " " : "", *(ptr4 + j));
- ascii_column = 9 * ngroups + 2;
- break;
- }
-
- case 2: {
- const u16 *ptr2 = buf;
- int ngroups = len / groupsize;
-
- for (j = 0; j < ngroups; j++)
- lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
- "%s%4.4x", j ? " " : "", *(ptr2 + j));
- ascii_column = 5 * ngroups + 2;
- break;
- }
-
- default:
- for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) {
- ch = ptr[j];
- linebuf[lx++] = hex_asc(ch >> 4);
- linebuf[lx++] = hex_asc(ch & 0x0f);
- linebuf[lx++] = ' ';
- }
- if (j)
- lx--;
-
- ascii_column = 3 * rowsize + 2;
- break;
- }
- if (!ascii)
- goto nil;
-
- while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
- linebuf[lx++] = ' ';
- for (j = 0; (j < len) && (lx + 2) < linebuflen; j++)
- linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j]
- : '.';
-nil:
- linebuf[lx++] = '\0';
-}
-
-void _kc_print_hex_dump(const char *level,
- const char *prefix_str, int prefix_type,
- int rowsize, int groupsize,
- const void *buf, size_t len, bool ascii)
-{
- const u8 *ptr = buf;
- int i, linelen, remaining = len;
- unsigned char linebuf[200];
-
- if (rowsize != 16 && rowsize != 32)
- rowsize = 16;
-
- for (i = 0; i < len; i += rowsize) {
- linelen = min(remaining, rowsize);
- remaining -= rowsize;
- _kc_hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
- linebuf, sizeof(linebuf), ascii);
-
- switch (prefix_type) {
- case DUMP_PREFIX_ADDRESS:
- printk("%s%s%*p: %s\n", level, prefix_str,
- (int)(2 * sizeof(void *)), ptr + i, linebuf);
- break;
- case DUMP_PREFIX_OFFSET:
- printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf);
- break;
- default:
- printk("%s%s%s\n", level, prefix_str, linebuf);
- break;
- }
- }
-}
-
-#ifdef HAVE_I2C_SUPPORT
-struct i2c_client *
-_kc_i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
-{
- struct i2c_client *client;
- int status;
-
- client = kzalloc(sizeof *client, GFP_KERNEL);
- if (!client)
- return NULL;
-
- client->adapter = adap;
-
- client->dev.platform_data = info->platform_data;
-
- client->flags = info->flags;
- client->addr = info->addr;
-
- strlcpy(client->name, info->type, sizeof(client->name));
-
- /* Check for address business */
- status = i2c_check_addr(adap, client->addr);
- if (status)
- goto out_err;
-
- client->dev.parent = &client->adapter->dev;
- client->dev.bus = &i2c_bus_type;
-
- status = i2c_attach_client(client);
- if (status)
- goto out_err;
-
- dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n",
- client->name, dev_name(&client->dev));
-
- return client;
-
-out_err:
- dev_err(&adap->dev, "Failed to register i2c client %s at 0x%02x "
- "(%d)\n", client->name, client->addr, status);
- kfree(client);
- return NULL;
-}
-#endif /* HAVE_I2C_SUPPORT */
-#endif /* < 2.6.22 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
-#ifdef NAPI
-struct net_device *napi_to_poll_dev(const struct napi_struct *napi)
-{
- struct adapter_q_vector *q_vector = container_of(napi,
- struct adapter_q_vector,
- napi);
- return &q_vector->poll_dev;
-}
-
-int __kc_adapter_clean(struct net_device *netdev, int *budget)
-{
- int work_done;
- int work_to_do = min(*budget, netdev->quota);
- /* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */
- struct napi_struct *napi = netdev->priv;
- work_done = napi->poll(napi, work_to_do);
- *budget -= work_done;
- netdev->quota -= work_done;
- return (work_done >= work_to_do) ? 1 : 0;
-}
-#endif /* NAPI */
-#endif /* <= 2.6.24 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) )
-void _kc_pci_disable_link_state(struct pci_dev *pdev, int state)
-{
- struct pci_dev *parent = pdev->bus->self;
- u16 link_state;
- int pos;
-
- if (!parent)
- return;
-
- pos = pci_find_capability(parent, PCI_CAP_ID_EXP);
- if (pos) {
- pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state);
- link_state &= ~state;
- pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state);
- }
-}
-#endif /* < 2.6.26 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) )
-#ifdef HAVE_TX_MQ
-void _kc_netif_tx_stop_all_queues(struct net_device *netdev)
-{
- struct adapter_struct *adapter = netdev_priv(netdev);
- int i;
-
- netif_stop_queue(netdev);
- if (netif_is_multiqueue(netdev))
- for (i = 0; i < adapter->num_tx_queues; i++)
- netif_stop_subqueue(netdev, i);
-}
-void _kc_netif_tx_wake_all_queues(struct net_device *netdev)
-{
- struct adapter_struct *adapter = netdev_priv(netdev);
- int i;
-
- netif_wake_queue(netdev);
- if (netif_is_multiqueue(netdev))
- for (i = 0; i < adapter->num_tx_queues; i++)
- netif_wake_subqueue(netdev, i);
-}
-void _kc_netif_tx_start_all_queues(struct net_device *netdev)
-{
- struct adapter_struct *adapter = netdev_priv(netdev);
- int i;
-
- netif_start_queue(netdev);
- if (netif_is_multiqueue(netdev))
- for (i = 0; i < adapter->num_tx_queues; i++)
- netif_start_subqueue(netdev, i);
-}
-#endif /* HAVE_TX_MQ */
-
-#ifndef __WARN_printf
-void __kc_warn_slowpath(const char *file, int line, const char *fmt, ...)
-{
- va_list args;
-
- printk(KERN_WARNING "------------[ cut here ]------------\n");
- printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line);
- va_start(args, fmt);
- vprintk(fmt, args);
- va_end(args);
-
- dump_stack();
-}
-#endif /* __WARN_printf */
-#endif /* < 2.6.27 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) )
-
-int
-_kc_pci_prepare_to_sleep(struct pci_dev *dev)
-{
- pci_power_t target_state;
- int error;
-
- target_state = pci_choose_state(dev, PMSG_SUSPEND);
-
- pci_enable_wake(dev, target_state, true);
-
- error = pci_set_power_state(dev, target_state);
-
- if (error)
- pci_enable_wake(dev, target_state, false);
-
- return error;
-}
-
-int
-_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable)
-{
- int err;
-
- err = pci_enable_wake(dev, PCI_D3cold, enable);
- if (err)
- goto out;
-
- err = pci_enable_wake(dev, PCI_D3hot, enable);
-
-out:
- return err;
-}
-#endif /* < 2.6.28 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) )
-static void __kc_pci_set_master(struct pci_dev *pdev, bool enable)
-{
- u16 old_cmd, cmd;
-
- pci_read_config_word(pdev, PCI_COMMAND, &old_cmd);
- if (enable)
- cmd = old_cmd | PCI_COMMAND_MASTER;
- else
- cmd = old_cmd & ~PCI_COMMAND_MASTER;
- if (cmd != old_cmd) {
- dev_dbg(pci_dev_to_dev(pdev), "%s bus mastering\n",
- enable ? "enabling" : "disabling");
- pci_write_config_word(pdev, PCI_COMMAND, cmd);
- }
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,7) )
- pdev->is_busmaster = enable;
-#endif
-}
-
-void _kc_pci_clear_master(struct pci_dev *dev)
-{
- __kc_pci_set_master(dev, false);
-}
-#endif /* < 2.6.29 */
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) )
-#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))
-int _kc_pci_num_vf(struct pci_dev *dev)
-{
- int num_vf = 0;
-#ifdef CONFIG_PCI_IOV
- struct pci_dev *vfdev;
-
- /* loop through all ethernet devices starting at PF dev */
- vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, NULL);
- while (vfdev) {
- if (vfdev->is_virtfn && vfdev->physfn == dev)
- num_vf++;
-
- vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, vfdev);
- }
-
-#endif
- return num_vf;
-}
-#endif /* RHEL_RELEASE_CODE */
-#endif /* < 2.6.34 */
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) )
-#ifdef HAVE_TX_MQ
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)))
-#ifndef CONFIG_NETDEVICES_MULTIQUEUE
-void _kc_netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
-{
- unsigned int real_num = dev->real_num_tx_queues;
- struct Qdisc *qdisc;
- int i;
-
- if (unlikely(txq > dev->num_tx_queues))
- ;
- else if (txq > real_num)
- dev->real_num_tx_queues = txq;
- else if ( txq < real_num) {
- dev->real_num_tx_queues = txq;
- for (i = txq; i < dev->num_tx_queues; i++) {
- qdisc = netdev_get_tx_queue(dev, i)->qdisc;
- if (qdisc) {
- spin_lock_bh(qdisc_lock(qdisc));
- qdisc_reset(qdisc);
- spin_unlock_bh(qdisc_lock(qdisc));
- }
- }
- }
-}
-#endif /* CONFIG_NETDEVICES_MULTIQUEUE */
-#endif /* !(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) */
-#endif /* HAVE_TX_MQ */
-
-ssize_t _kc_simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
- const void __user *from, size_t count)
-{
- loff_t pos = *ppos;
- size_t res;
-
- if (pos < 0)
- return -EINVAL;
- if (pos >= available || !count)
- return 0;
- if (count > available - pos)
- count = available - pos;
- res = copy_from_user(to + pos, from, count);
- if (res == count)
- return -EFAULT;
- count -= res;
- *ppos = pos + count;
- return count;
-}
-
-#endif /* < 2.6.35 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) )
-static const u32 _kc_flags_dup_features =
- (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
-
-u32 _kc_ethtool_op_get_flags(struct net_device *dev)
-{
- return dev->features & _kc_flags_dup_features;
-}
-
-int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
-{
- if (data & ~supported)
- return -EINVAL;
-
- dev->features = ((dev->features & ~_kc_flags_dup_features) |
- (data & _kc_flags_dup_features));
- return 0;
-}
-#endif /* < 2.6.36 */
-
-/******************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) )
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)))
-
-
-
-#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */
-#endif /* < 2.6.39 */
-
-/******************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) )
-void _kc_skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page,
- int off, int size, unsigned int truesize)
-{
- skb_fill_page_desc(skb, i, page, off, size);
- skb->len += size;
- skb->data_len += size;
- skb->truesize += truesize;
-}
-
-int _kc_simple_open(struct inode *inode, struct file *file)
-{
- if (inode->i_private)
- file->private_data = inode->i_private;
-
- return 0;
-}
-
-#endif /* < 3.4.0 */
-
-/******************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) )
-#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \
- !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5))
-static inline int __kc_pcie_cap_version(struct pci_dev *dev)
-{
- int pos;
- u16 reg16;
-
- pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
- if (!pos)
- return 0;
- pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &reg16);
- return reg16 & PCI_EXP_FLAGS_VERS;
-}
-
-static inline bool __kc_pcie_cap_has_devctl(const struct pci_dev __always_unused *dev)
-{
- return true;
-}
-
-static inline bool __kc_pcie_cap_has_lnkctl(struct pci_dev *dev)
-{
- int type = pci_pcie_type(dev);
-
- return __kc_pcie_cap_version(dev) > 1 ||
- type == PCI_EXP_TYPE_ROOT_PORT ||
- type == PCI_EXP_TYPE_ENDPOINT ||
- type == PCI_EXP_TYPE_LEG_END;
-}
-
-static inline bool __kc_pcie_cap_has_sltctl(struct pci_dev *dev)
-{
- int type = pci_pcie_type(dev);
- int pos;
- u16 pcie_flags_reg;
-
- pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
- if (!pos)
- return 0;
- pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &pcie_flags_reg);
-
- return __kc_pcie_cap_version(dev) > 1 ||
- type == PCI_EXP_TYPE_ROOT_PORT ||
- (type == PCI_EXP_TYPE_DOWNSTREAM &&
- pcie_flags_reg & PCI_EXP_FLAGS_SLOT);
-}
-
-static inline bool __kc_pcie_cap_has_rtctl(struct pci_dev *dev)
-{
- int type = pci_pcie_type(dev);
-
- return __kc_pcie_cap_version(dev) > 1 ||
- type == PCI_EXP_TYPE_ROOT_PORT ||
- type == PCI_EXP_TYPE_RC_EC;
-}
-
-static bool __kc_pcie_capability_reg_implemented(struct pci_dev *dev, int pos)
-{
- if (!pci_is_pcie(dev))
- return false;
-
- switch (pos) {
- case PCI_EXP_FLAGS_TYPE:
- return true;
- case PCI_EXP_DEVCAP:
- case PCI_EXP_DEVCTL:
- case PCI_EXP_DEVSTA:
- return __kc_pcie_cap_has_devctl(dev);
- case PCI_EXP_LNKCAP:
- case PCI_EXP_LNKCTL:
- case PCI_EXP_LNKSTA:
- return __kc_pcie_cap_has_lnkctl(dev);
- case PCI_EXP_SLTCAP:
- case PCI_EXP_SLTCTL:
- case PCI_EXP_SLTSTA:
- return __kc_pcie_cap_has_sltctl(dev);
- case PCI_EXP_RTCTL:
- case PCI_EXP_RTCAP:
- case PCI_EXP_RTSTA:
- return __kc_pcie_cap_has_rtctl(dev);
- case PCI_EXP_DEVCAP2:
- case PCI_EXP_DEVCTL2:
- case PCI_EXP_LNKCAP2:
- case PCI_EXP_LNKCTL2:
- case PCI_EXP_LNKSTA2:
- return __kc_pcie_cap_version(dev) > 1;
- default:
- return false;
- }
-}
-
-/*
- * Note that these accessor functions are only for the "PCI Express
- * Capability" (see PCIe spec r3.0, sec 7.8). They do not apply to the
- * other "PCI Express Extended Capabilities" (AER, VC, ACS, MFVC, etc.)
- */
-int __kc_pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val)
-{
- int ret;
-
- *val = 0;
- if (pos & 1)
- return -EINVAL;
-
- if (__kc_pcie_capability_reg_implemented(dev, pos)) {
- ret = pci_read_config_word(dev, pci_pcie_cap(dev) + pos, val);
- /*
- * Reset *val to 0 if pci_read_config_word() fails, it may
- * have been written as 0xFFFF if hardware error happens
- * during pci_read_config_word().
- */
- if (ret)
- *val = 0;
- return ret;
- }
-
- /*
- * For Functions that do not implement the Slot Capabilities,
- * Slot Status, and Slot Control registers, these spaces must
- * be hardwired to 0b, with the exception of the Presence Detect
- * State bit in the Slot Status register of Downstream Ports,
- * which must be hardwired to 1b. (PCIe Base Spec 3.0, sec 7.8)
- */
- if (pci_is_pcie(dev) && pos == PCI_EXP_SLTSTA &&
- pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) {
- *val = PCI_EXP_SLTSTA_PDS;
- }
-
- return 0;
-}
-
-int __kc_pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val)
-{
- if (pos & 1)
- return -EINVAL;
-
- if (!__kc_pcie_capability_reg_implemented(dev, pos))
- return 0;
-
- return pci_write_config_word(dev, pci_pcie_cap(dev) + pos, val);
-}
-
-int __kc_pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos,
- u16 clear, u16 set)
-{
- int ret;
- u16 val;
-
- ret = __kc_pcie_capability_read_word(dev, pos, &val);
- if (!ret) {
- val &= ~clear;
- val |= set;
- ret = __kc_pcie_capability_write_word(dev, pos, val);
- }
-
- return ret;
-}
-#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \
- !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) */
-#endif /* < 3.7.0 */
-
-/******************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) )
-#endif /* 3.9.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
-#ifdef CONFIG_PCI_IOV
-int __kc_pci_vfs_assigned(struct pci_dev *dev)
-{
- unsigned int vfs_assigned = 0;
-#ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
- int pos;
- struct pci_dev *vfdev;
- unsigned short dev_id;
-
- /* only search if we are a PF */
- if (!dev->is_physfn)
- return 0;
-
- /* find SR-IOV capability */
- pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
- if (!pos)
- return 0;
-
- /*
- * determine the device ID for the VFs, the vendor ID will be the
- * same as the PF so there is no need to check for that one
- */
- pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &dev_id);
-
- /* loop through all the VFs to see if we own any that are assigned */
- vfdev = pci_get_device(dev->vendor, dev_id, NULL);
- while (vfdev) {
- /*
- * It is considered assigned if it is a virtual function with
- * our dev as the physical function and the assigned bit is set
- */
- if (vfdev->is_virtfn && (vfdev->physfn == dev) &&
- (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED))
- vfs_assigned++;
-
- vfdev = pci_get_device(dev->vendor, dev_id, vfdev);
- }
-
-#endif /* HAVE_PCI_DEV_FLAGS_ASSIGNED */
- return vfs_assigned;
-}
-
-#endif /* CONFIG_PCI_IOV */
-#endif /* 3.10.0 */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
index de3b8dc9..84826b26 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
@@ -3891,7 +3891,7 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) ) \
|| ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
#define HAVE_NDO_DFLT_BRIDGE_ADD_MASK
-#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
+#if ( RHEL_RELEASE_CODE != RHEL_RELEASE_VERSION(7,2) )
#define HAVE_NDO_FDB_ADD_VID
#endif /* !RHEL 7.2 */
#endif /* >= 3.19.0 */
@@ -3901,12 +3901,13 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
/* vlan_tx_xx functions got renamed to skb_vlan */
#define vlan_tx_tag_get skb_vlan_tag_get
#define vlan_tx_tag_present skb_vlan_tag_present
-#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
+#if ( RHEL_RELEASE_CODE != RHEL_RELEASE_VERSION(7,2) )
#define HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS
#endif /* !RHEL 7.2 */
#endif /* 4.0.0 */
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) )
+#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) ) \
+ || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,3) ))
/* ndo_bridge_getlink adds new nlflags parameter */
#define HAVE_NDO_BRIDGE_GETLINK_NLFLAGS
#endif /* >= 4.1.0 */
@@ -3916,6 +3917,18 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
#define HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL
#endif /* >= 4.2.0 */
+/*
+ * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4)
+ * For older kernels backported this commit, need to use renamed functions.
+ * This fix is specific to RedHat/CentOS kernels.
+ */
+#if (defined(RHEL_RELEASE_CODE) && \
+ (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \
+ (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)))
+#define vlan_tx_tag_get skb_vlan_tag_get
+#define vlan_tx_tag_present skb_vlan_tag_present
+#endif
+
#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0) )
#define HAVE_VF_VLAN_PROTO
#endif /* >= 4.9.0 */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c
deleted file mode 100644
index e1a89388..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c
+++ /dev/null
@@ -1,1171 +0,0 @@
-/*******************************************************************************
-
- Intel(R) Gigabit Ethernet Linux driver
- Copyright(c) 2007-2013 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-/*
- * net/core/ethtool.c - Ethtool ioctl handler
- * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx>
- *
- * This file is where we call all the ethtool_ops commands to get
- * the information ethtool needs. We fall back to calling do_ioctl()
- * for drivers which haven't been converted to ethtool_ops yet.
- *
- * It's GPL, stupid.
- *
- * Modification by sfeldma@pobox.com to work as backward compat
- * solution for pre-ethtool_ops kernels.
- * - copied struct ethtool_ops from ethtool.h
- * - defined SET_ETHTOOL_OPS
- * - put in some #ifndef NETIF_F_xxx wrappers
- * - changes refs to dev->ethtool_ops to ethtool_ops
- * - changed dev_ethtool to ethtool_ioctl
- * - remove EXPORT_SYMBOL()s
- * - added _kc_ prefix in built-in ethtool_op_xxx ops.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/mii.h>
-#include <linux/ethtool.h>
-#include <linux/netdevice.h>
-#include <asm/uaccess.h>
-
-#include "kcompat.h"
-
-#undef SUPPORTED_10000baseT_Full
-#define SUPPORTED_10000baseT_Full (1 << 12)
-#undef ADVERTISED_10000baseT_Full
-#define ADVERTISED_10000baseT_Full (1 << 12)
-#undef SPEED_10000
-#define SPEED_10000 10000
-
-#undef ethtool_ops
-#define ethtool_ops _kc_ethtool_ops
-
-struct _kc_ethtool_ops {
- int (*get_settings)(struct net_device *, struct ethtool_cmd *);
- int (*set_settings)(struct net_device *, struct ethtool_cmd *);
- void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *);
- int (*get_regs_len)(struct net_device *);
- void (*get_regs)(struct net_device *, struct ethtool_regs *, void *);
- void (*get_wol)(struct net_device *, struct ethtool_wolinfo *);
- int (*set_wol)(struct net_device *, struct ethtool_wolinfo *);
- u32 (*get_msglevel)(struct net_device *);
- void (*set_msglevel)(struct net_device *, u32);
- int (*nway_reset)(struct net_device *);
- u32 (*get_link)(struct net_device *);
- int (*get_eeprom_len)(struct net_device *);
- int (*get_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *);
- int (*set_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *);
- int (*get_coalesce)(struct net_device *, struct ethtool_coalesce *);
- int (*set_coalesce)(struct net_device *, struct ethtool_coalesce *);
- void (*get_ringparam)(struct net_device *, struct ethtool_ringparam *);
- int (*set_ringparam)(struct net_device *, struct ethtool_ringparam *);
- void (*get_pauseparam)(struct net_device *,
- struct ethtool_pauseparam*);
- int (*set_pauseparam)(struct net_device *,
- struct ethtool_pauseparam*);
- u32 (*get_rx_csum)(struct net_device *);
- int (*set_rx_csum)(struct net_device *, u32);
- u32 (*get_tx_csum)(struct net_device *);
- int (*set_tx_csum)(struct net_device *, u32);
- u32 (*get_sg)(struct net_device *);
- int (*set_sg)(struct net_device *, u32);
- u32 (*get_tso)(struct net_device *);
- int (*set_tso)(struct net_device *, u32);
- int (*self_test_count)(struct net_device *);
- void (*self_test)(struct net_device *, struct ethtool_test *, u64 *);
- void (*get_strings)(struct net_device *, u32 stringset, u8 *);
- int (*phys_id)(struct net_device *, u32);
- int (*get_stats_count)(struct net_device *);
- void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *,
- u64 *);
-} *ethtool_ops = NULL;
-
-#undef SET_ETHTOOL_OPS
-#define SET_ETHTOOL_OPS(netdev, ops) (ethtool_ops = (ops))
-
-/*
- * Some useful ethtool_ops methods that are device independent. If we find that
- * all drivers want to do the same thing here, we can turn these into dev_()
- * function calls.
- */
-
-#undef ethtool_op_get_link
-#define ethtool_op_get_link _kc_ethtool_op_get_link
-u32 _kc_ethtool_op_get_link(struct net_device *dev)
-{
- return netif_carrier_ok(dev) ? 1 : 0;
-}
-
-#undef ethtool_op_get_tx_csum
-#define ethtool_op_get_tx_csum _kc_ethtool_op_get_tx_csum
-u32 _kc_ethtool_op_get_tx_csum(struct net_device *dev)
-{
-#ifdef NETIF_F_IP_CSUM
- return (dev->features & NETIF_F_IP_CSUM) != 0;
-#else
- return 0;
-#endif
-}
-
-#undef ethtool_op_set_tx_csum
-#define ethtool_op_set_tx_csum _kc_ethtool_op_set_tx_csum
-int _kc_ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
-{
-#ifdef NETIF_F_IP_CSUM
- if (data)
-#ifdef NETIF_F_IPV6_CSUM
- dev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
- else
- dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
-#else
- dev->features |= NETIF_F_IP_CSUM;
- else
- dev->features &= ~NETIF_F_IP_CSUM;
-#endif
-#endif
-
- return 0;
-}
-
-#undef ethtool_op_get_sg
-#define ethtool_op_get_sg _kc_ethtool_op_get_sg
-u32 _kc_ethtool_op_get_sg(struct net_device *dev)
-{
-#ifdef NETIF_F_SG
- return (dev->features & NETIF_F_SG) != 0;
-#else
- return 0;
-#endif
-}
-
-#undef ethtool_op_set_sg
-#define ethtool_op_set_sg _kc_ethtool_op_set_sg
-int _kc_ethtool_op_set_sg(struct net_device *dev, u32 data)
-{
-#ifdef NETIF_F_SG
- if (data)
- dev->features |= NETIF_F_SG;
- else
- dev->features &= ~NETIF_F_SG;
-#endif
-
- return 0;
-}
-
-#undef ethtool_op_get_tso
-#define ethtool_op_get_tso _kc_ethtool_op_get_tso
-u32 _kc_ethtool_op_get_tso(struct net_device *dev)
-{
-#ifdef NETIF_F_TSO
- return (dev->features & NETIF_F_TSO) != 0;
-#else
- return 0;
-#endif
-}
-
-#undef ethtool_op_set_tso
-#define ethtool_op_set_tso _kc_ethtool_op_set_tso
-int _kc_ethtool_op_set_tso(struct net_device *dev, u32 data)
-{
-#ifdef NETIF_F_TSO
- if (data)
- dev->features |= NETIF_F_TSO;
- else
- dev->features &= ~NETIF_F_TSO;
-#endif
-
- return 0;
-}
-
-/* Handlers for each ethtool command */
-
-static int ethtool_get_settings(struct net_device *dev, void *useraddr)
-{
- struct ethtool_cmd cmd = { ETHTOOL_GSET };
- int err;
-
- if (!ethtool_ops->get_settings)
- return -EOPNOTSUPP;
-
- err = ethtool_ops->get_settings(dev, &cmd);
- if (err < 0)
- return err;
-
- if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_settings(struct net_device *dev, void *useraddr)
-{
- struct ethtool_cmd cmd;
-
- if (!ethtool_ops->set_settings)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
- return -EFAULT;
-
- return ethtool_ops->set_settings(dev, &cmd);
-}
-
-static int ethtool_get_drvinfo(struct net_device *dev, void *useraddr)
-{
- struct ethtool_drvinfo info;
- struct ethtool_ops *ops = ethtool_ops;
-
- if (!ops->get_drvinfo)
- return -EOPNOTSUPP;
-
- memset(&info, 0, sizeof(info));
- info.cmd = ETHTOOL_GDRVINFO;
- ops->get_drvinfo(dev, &info);
-
- if (ops->self_test_count)
- info.testinfo_len = ops->self_test_count(dev);
- if (ops->get_stats_count)
- info.n_stats = ops->get_stats_count(dev);
- if (ops->get_regs_len)
- info.regdump_len = ops->get_regs_len(dev);
- if (ops->get_eeprom_len)
- info.eedump_len = ops->get_eeprom_len(dev);
-
- if (copy_to_user(useraddr, &info, sizeof(info)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_get_regs(struct net_device *dev, char *useraddr)
-{
- struct ethtool_regs regs;
- struct ethtool_ops *ops = ethtool_ops;
- void *regbuf;
- int reglen, ret;
-
- if (!ops->get_regs || !ops->get_regs_len)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&regs, useraddr, sizeof(regs)))
- return -EFAULT;
-
- reglen = ops->get_regs_len(dev);
- if (regs.len > reglen)
- regs.len = reglen;
-
- regbuf = kmalloc(reglen, GFP_USER);
- if (!regbuf)
- return -ENOMEM;
-
- ops->get_regs(dev, &regs, regbuf);
-
- ret = -EFAULT;
- if (copy_to_user(useraddr, &regs, sizeof(regs)))
- goto out;
- useraddr += offsetof(struct ethtool_regs, data);
- if (copy_to_user(useraddr, regbuf, reglen))
- goto out;
- ret = 0;
-
-out:
- kfree(regbuf);
- return ret;
-}
-
-static int ethtool_get_wol(struct net_device *dev, char *useraddr)
-{
- struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
-
- if (!ethtool_ops->get_wol)
- return -EOPNOTSUPP;
-
- ethtool_ops->get_wol(dev, &wol);
-
- if (copy_to_user(useraddr, &wol, sizeof(wol)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_wol(struct net_device *dev, char *useraddr)
-{
- struct ethtool_wolinfo wol;
-
- if (!ethtool_ops->set_wol)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&wol, useraddr, sizeof(wol)))
- return -EFAULT;
-
- return ethtool_ops->set_wol(dev, &wol);
-}
-
-static int ethtool_get_msglevel(struct net_device *dev, char *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GMSGLVL };
-
- if (!ethtool_ops->get_msglevel)
- return -EOPNOTSUPP;
-
- edata.data = ethtool_ops->get_msglevel(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_msglevel(struct net_device *dev, char *useraddr)
-{
- struct ethtool_value edata;
-
- if (!ethtool_ops->set_msglevel)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- ethtool_ops->set_msglevel(dev, edata.data);
- return 0;
-}
-
-static int ethtool_nway_reset(struct net_device *dev)
-{
- if (!ethtool_ops->nway_reset)
- return -EOPNOTSUPP;
-
- return ethtool_ops->nway_reset(dev);
-}
-
-static int ethtool_get_link(struct net_device *dev, void *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GLINK };
-
- if (!ethtool_ops->get_link)
- return -EOPNOTSUPP;
-
- edata.data = ethtool_ops->get_link(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_get_eeprom(struct net_device *dev, void *useraddr)
-{
- struct ethtool_eeprom eeprom;
- struct ethtool_ops *ops = ethtool_ops;
- u8 *data;
- int ret;
-
- if (!ops->get_eeprom || !ops->get_eeprom_len)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
- return -EFAULT;
-
- /* Check for wrap and zero */
- if (eeprom.offset + eeprom.len <= eeprom.offset)
- return -EINVAL;
-
- /* Check for exceeding total eeprom len */
- if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
- return -EINVAL;
-
- data = kmalloc(eeprom.len, GFP_USER);
- if (!data)
- return -ENOMEM;
-
- ret = -EFAULT;
- if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
- goto out;
-
- ret = ops->get_eeprom(dev, &eeprom, data);
- if (ret)
- goto out;
-
- ret = -EFAULT;
- if (copy_to_user(useraddr, &eeprom, sizeof(eeprom)))
- goto out;
- if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
- goto out;
- ret = 0;
-
-out:
- kfree(data);
- return ret;
-}
-
-static int ethtool_set_eeprom(struct net_device *dev, void *useraddr)
-{
- struct ethtool_eeprom eeprom;
- struct ethtool_ops *ops = ethtool_ops;
- u8 *data;
- int ret;
-
- if (!ops->set_eeprom || !ops->get_eeprom_len)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
- return -EFAULT;
-
- /* Check for wrap and zero */
- if (eeprom.offset + eeprom.len <= eeprom.offset)
- return -EINVAL;
-
- /* Check for exceeding total eeprom len */
- if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
- return -EINVAL;
-
- data = kmalloc(eeprom.len, GFP_USER);
- if (!data)
- return -ENOMEM;
-
- ret = -EFAULT;
- if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
- goto out;
-
- ret = ops->set_eeprom(dev, &eeprom, data);
- if (ret)
- goto out;
-
- if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
- ret = -EFAULT;
-
-out:
- kfree(data);
- return ret;
-}
-
-static int ethtool_get_coalesce(struct net_device *dev, void *useraddr)
-{
- struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE };
-
- if (!ethtool_ops->get_coalesce)
- return -EOPNOTSUPP;
-
- ethtool_ops->get_coalesce(dev, &coalesce);
-
- if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_coalesce(struct net_device *dev, void *useraddr)
-{
- struct ethtool_coalesce coalesce;
-
- if (!ethtool_ops->get_coalesce)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
- return -EFAULT;
-
- return ethtool_ops->set_coalesce(dev, &coalesce);
-}
-
-static int ethtool_get_ringparam(struct net_device *dev, void *useraddr)
-{
- struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM };
-
- if (!ethtool_ops->get_ringparam)
- return -EOPNOTSUPP;
-
- ethtool_ops->get_ringparam(dev, &ringparam);
-
- if (copy_to_user(useraddr, &ringparam, sizeof(ringparam)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_ringparam(struct net_device *dev, void *useraddr)
-{
- struct ethtool_ringparam ringparam;
-
- if (!ethtool_ops->get_ringparam)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
- return -EFAULT;
-
- return ethtool_ops->set_ringparam(dev, &ringparam);
-}
-
-static int ethtool_get_pauseparam(struct net_device *dev, void *useraddr)
-{
- struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
-
- if (!ethtool_ops->get_pauseparam)
- return -EOPNOTSUPP;
-
- ethtool_ops->get_pauseparam(dev, &pauseparam);
-
- if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_pauseparam(struct net_device *dev, void *useraddr)
-{
- struct ethtool_pauseparam pauseparam;
-
- if (!ethtool_ops->get_pauseparam)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam)))
- return -EFAULT;
-
- return ethtool_ops->set_pauseparam(dev, &pauseparam);
-}
-
-static int ethtool_get_rx_csum(struct net_device *dev, char *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GRXCSUM };
-
- if (!ethtool_ops->get_rx_csum)
- return -EOPNOTSUPP;
-
- edata.data = ethtool_ops->get_rx_csum(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_rx_csum(struct net_device *dev, char *useraddr)
-{
- struct ethtool_value edata;
-
- if (!ethtool_ops->set_rx_csum)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- ethtool_ops->set_rx_csum(dev, edata.data);
- return 0;
-}
-
-static int ethtool_get_tx_csum(struct net_device *dev, char *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GTXCSUM };
-
- if (!ethtool_ops->get_tx_csum)
- return -EOPNOTSUPP;
-
- edata.data = ethtool_ops->get_tx_csum(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_tx_csum(struct net_device *dev, char *useraddr)
-{
- struct ethtool_value edata;
-
- if (!ethtool_ops->set_tx_csum)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- return ethtool_ops->set_tx_csum(dev, edata.data);
-}
-
-static int ethtool_get_sg(struct net_device *dev, char *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GSG };
-
- if (!ethtool_ops->get_sg)
- return -EOPNOTSUPP;
-
- edata.data = ethtool_ops->get_sg(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_sg(struct net_device *dev, char *useraddr)
-{
- struct ethtool_value edata;
-
- if (!ethtool_ops->set_sg)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- return ethtool_ops->set_sg(dev, edata.data);
-}
-
-static int ethtool_get_tso(struct net_device *dev, char *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GTSO };
-
- if (!ethtool_ops->get_tso)
- return -EOPNOTSUPP;
-
- edata.data = ethtool_ops->get_tso(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_tso(struct net_device *dev, char *useraddr)
-{
- struct ethtool_value edata;
-
- if (!ethtool_ops->set_tso)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- return ethtool_ops->set_tso(dev, edata.data);
-}
-
-static int ethtool_self_test(struct net_device *dev, char *useraddr)
-{
- struct ethtool_test test;
- struct ethtool_ops *ops = ethtool_ops;
- u64 *data;
- int ret;
-
- if (!ops->self_test || !ops->self_test_count)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&test, useraddr, sizeof(test)))
- return -EFAULT;
-
- test.len = ops->self_test_count(dev);
- data = kmalloc(test.len * sizeof(u64), GFP_USER);
- if (!data)
- return -ENOMEM;
-
- ops->self_test(dev, &test, data);
-
- ret = -EFAULT;
- if (copy_to_user(useraddr, &test, sizeof(test)))
- goto out;
- useraddr += sizeof(test);
- if (copy_to_user(useraddr, data, test.len * sizeof(u64)))
- goto out;
- ret = 0;
-
-out:
- kfree(data);
- return ret;
-}
-
-static int ethtool_get_strings(struct net_device *dev, void *useraddr)
-{
- struct ethtool_gstrings gstrings;
- struct ethtool_ops *ops = ethtool_ops;
- u8 *data;
- int ret;
-
- if (!ops->get_strings)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
- return -EFAULT;
-
- switch (gstrings.string_set) {
- case ETH_SS_TEST:
- if (!ops->self_test_count)
- return -EOPNOTSUPP;
- gstrings.len = ops->self_test_count(dev);
- break;
- case ETH_SS_STATS:
- if (!ops->get_stats_count)
- return -EOPNOTSUPP;
- gstrings.len = ops->get_stats_count(dev);
- break;
- default:
- return -EINVAL;
- }
-
- data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
- if (!data)
- return -ENOMEM;
-
- ops->get_strings(dev, gstrings.string_set, data);
-
- ret = -EFAULT;
- if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
- goto out;
- useraddr += sizeof(gstrings);
- if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
- goto out;
- ret = 0;
-
-out:
- kfree(data);
- return ret;
-}
-
-static int ethtool_phys_id(struct net_device *dev, void *useraddr)
-{
- struct ethtool_value id;
-
- if (!ethtool_ops->phys_id)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&id, useraddr, sizeof(id)))
- return -EFAULT;
-
- return ethtool_ops->phys_id(dev, id.data);
-}
-
-static int ethtool_get_stats(struct net_device *dev, void *useraddr)
-{
- struct ethtool_stats stats;
- struct ethtool_ops *ops = ethtool_ops;
- u64 *data;
- int ret;
-
- if (!ops->get_ethtool_stats || !ops->get_stats_count)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&stats, useraddr, sizeof(stats)))
- return -EFAULT;
-
- stats.n_stats = ops->get_stats_count(dev);
- data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER);
- if (!data)
- return -ENOMEM;
-
- ops->get_ethtool_stats(dev, &stats, data);
-
- ret = -EFAULT;
- if (copy_to_user(useraddr, &stats, sizeof(stats)))
- goto out;
- useraddr += sizeof(stats);
- if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
- goto out;
- ret = 0;
-
-out:
- kfree(data);
- return ret;
-}
-
-/* The main entry point in this file. Called from net/core/dev.c */
-
-#define ETHTOOL_OPS_COMPAT
-int ethtool_ioctl(struct ifreq *ifr)
-{
- struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
- void *useraddr = (void *) ifr->ifr_data;
- u32 ethcmd;
-
- /*
- * XXX: This can be pushed down into the ethtool_* handlers that
- * need it. Keep existing behavior for the moment.
- */
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
- if (!dev || !netif_device_present(dev))
- return -ENODEV;
-
- if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
- return -EFAULT;
-
- switch (ethcmd) {
- case ETHTOOL_GSET:
- return ethtool_get_settings(dev, useraddr);
- case ETHTOOL_SSET:
- return ethtool_set_settings(dev, useraddr);
- case ETHTOOL_GDRVINFO:
- return ethtool_get_drvinfo(dev, useraddr);
- case ETHTOOL_GREGS:
- return ethtool_get_regs(dev, useraddr);
- case ETHTOOL_GWOL:
- return ethtool_get_wol(dev, useraddr);
- case ETHTOOL_SWOL:
- return ethtool_set_wol(dev, useraddr);
- case ETHTOOL_GMSGLVL:
- return ethtool_get_msglevel(dev, useraddr);
- case ETHTOOL_SMSGLVL:
- return ethtool_set_msglevel(dev, useraddr);
- case ETHTOOL_NWAY_RST:
- return ethtool_nway_reset(dev);
- case ETHTOOL_GLINK:
- return ethtool_get_link(dev, useraddr);
- case ETHTOOL_GEEPROM:
- return ethtool_get_eeprom(dev, useraddr);
- case ETHTOOL_SEEPROM:
- return ethtool_set_eeprom(dev, useraddr);
- case ETHTOOL_GCOALESCE:
- return ethtool_get_coalesce(dev, useraddr);
- case ETHTOOL_SCOALESCE:
- return ethtool_set_coalesce(dev, useraddr);
- case ETHTOOL_GRINGPARAM:
- return ethtool_get_ringparam(dev, useraddr);
- case ETHTOOL_SRINGPARAM:
- return ethtool_set_ringparam(dev, useraddr);
- case ETHTOOL_GPAUSEPARAM:
- return ethtool_get_pauseparam(dev, useraddr);
- case ETHTOOL_SPAUSEPARAM:
- return ethtool_set_pauseparam(dev, useraddr);
- case ETHTOOL_GRXCSUM:
- return ethtool_get_rx_csum(dev, useraddr);
- case ETHTOOL_SRXCSUM:
- return ethtool_set_rx_csum(dev, useraddr);
- case ETHTOOL_GTXCSUM:
- return ethtool_get_tx_csum(dev, useraddr);
- case ETHTOOL_STXCSUM:
- return ethtool_set_tx_csum(dev, useraddr);
- case ETHTOOL_GSG:
- return ethtool_get_sg(dev, useraddr);
- case ETHTOOL_SSG:
- return ethtool_set_sg(dev, useraddr);
- case ETHTOOL_GTSO:
- return ethtool_get_tso(dev, useraddr);
- case ETHTOOL_STSO:
- return ethtool_set_tso(dev, useraddr);
- case ETHTOOL_TEST:
- return ethtool_self_test(dev, useraddr);
- case ETHTOOL_GSTRINGS:
- return ethtool_get_strings(dev, useraddr);
- case ETHTOOL_PHYS_ID:
- return ethtool_phys_id(dev, useraddr);
- case ETHTOOL_GSTATS:
- return ethtool_get_stats(dev, useraddr);
- default:
- return -EOPNOTSUPP;
- }
-
- return -EOPNOTSUPP;
-}
-
-#define mii_if_info _kc_mii_if_info
-struct _kc_mii_if_info {
- int phy_id;
- int advertising;
- int phy_id_mask;
- int reg_num_mask;
-
- unsigned int full_duplex : 1; /* is full duplex? */
- unsigned int force_media : 1; /* is autoneg. disabled? */
-
- struct net_device *dev;
- int (*mdio_read) (struct net_device *dev, int phy_id, int location);
- void (*mdio_write) (struct net_device *dev, int phy_id, int location, int val);
-};
-
-struct ethtool_cmd;
-struct mii_ioctl_data;
-
-#undef mii_link_ok
-#define mii_link_ok _kc_mii_link_ok
-#undef mii_nway_restart
-#define mii_nway_restart _kc_mii_nway_restart
-#undef mii_ethtool_gset
-#define mii_ethtool_gset _kc_mii_ethtool_gset
-#undef mii_ethtool_sset
-#define mii_ethtool_sset _kc_mii_ethtool_sset
-#undef mii_check_link
-#define mii_check_link _kc_mii_check_link
-extern int _kc_mii_link_ok (struct mii_if_info *mii);
-extern int _kc_mii_nway_restart (struct mii_if_info *mii);
-extern int _kc_mii_ethtool_gset(struct mii_if_info *mii,
- struct ethtool_cmd *ecmd);
-extern int _kc_mii_ethtool_sset(struct mii_if_info *mii,
- struct ethtool_cmd *ecmd);
-extern void _kc_mii_check_link (struct mii_if_info *mii);
-#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) )
-#undef generic_mii_ioctl
-#define generic_mii_ioctl _kc_generic_mii_ioctl
-extern int _kc_generic_mii_ioctl(struct mii_if_info *mii_if,
- struct mii_ioctl_data *mii_data, int cmd,
- unsigned int *duplex_changed);
-#endif /* > 2.4.6 */
-
-
-struct _kc_pci_dev_ext {
- struct pci_dev *dev;
- void *pci_drvdata;
- struct pci_driver *driver;
-};
-
-struct _kc_net_dev_ext {
- struct net_device *dev;
- unsigned int carrier;
-};
-
-
-/**************************************/
-/* mii support */
-
-int _kc_mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
-{
- struct net_device *dev = mii->dev;
- u32 advert, bmcr, lpa, nego;
-
- ecmd->supported =
- (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
- SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
- SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII);
-
- /* only supports twisted-pair */
- ecmd->port = PORT_MII;
-
- /* only supports internal transceiver */
- ecmd->transceiver = XCVR_INTERNAL;
-
- /* this isn't fully supported at higher layers */
- ecmd->phy_address = mii->phy_id;
-
- ecmd->advertising = ADVERTISED_TP | ADVERTISED_MII;
- advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE);
- if (advert & ADVERTISE_10HALF)
- ecmd->advertising |= ADVERTISED_10baseT_Half;
- if (advert & ADVERTISE_10FULL)
- ecmd->advertising |= ADVERTISED_10baseT_Full;
- if (advert & ADVERTISE_100HALF)
- ecmd->advertising |= ADVERTISED_100baseT_Half;
- if (advert & ADVERTISE_100FULL)
- ecmd->advertising |= ADVERTISED_100baseT_Full;
-
- bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
- lpa = mii->mdio_read(dev, mii->phy_id, MII_LPA);
- if (bmcr & BMCR_ANENABLE) {
- ecmd->advertising |= ADVERTISED_Autoneg;
- ecmd->autoneg = AUTONEG_ENABLE;
-
- nego = mii_nway_result(advert & lpa);
- if (nego == LPA_100FULL || nego == LPA_100HALF)
- ecmd->speed = SPEED_100;
- else
- ecmd->speed = SPEED_10;
- if (nego == LPA_100FULL || nego == LPA_10FULL) {
- ecmd->duplex = DUPLEX_FULL;
- mii->full_duplex = 1;
- } else {
- ecmd->duplex = DUPLEX_HALF;
- mii->full_duplex = 0;
- }
- } else {
- ecmd->autoneg = AUTONEG_DISABLE;
-
- ecmd->speed = (bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10;
- ecmd->duplex = (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF;
- }
-
- /* ignore maxtxpkt, maxrxpkt for now */
-
- return 0;
-}
-
-int _kc_mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
-{
- struct net_device *dev = mii->dev;
-
- if (ecmd->speed != SPEED_10 && ecmd->speed != SPEED_100)
- return -EINVAL;
- if (ecmd->duplex != DUPLEX_HALF && ecmd->duplex != DUPLEX_FULL)
- return -EINVAL;
- if (ecmd->port != PORT_MII)
- return -EINVAL;
- if (ecmd->transceiver != XCVR_INTERNAL)
- return -EINVAL;
- if (ecmd->phy_address != mii->phy_id)
- return -EINVAL;
- if (ecmd->autoneg != AUTONEG_DISABLE && ecmd->autoneg != AUTONEG_ENABLE)
- return -EINVAL;
-
- /* ignore supported, maxtxpkt, maxrxpkt */
-
- if (ecmd->autoneg == AUTONEG_ENABLE) {
- u32 bmcr, advert, tmp;
-
- if ((ecmd->advertising & (ADVERTISED_10baseT_Half |
- ADVERTISED_10baseT_Full |
- ADVERTISED_100baseT_Half |
- ADVERTISED_100baseT_Full)) == 0)
- return -EINVAL;
-
- /* advertise only what has been requested */
- advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE);
- tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4);
- if (ADVERTISED_10baseT_Half)
- tmp |= ADVERTISE_10HALF;
- if (ADVERTISED_10baseT_Full)
- tmp |= ADVERTISE_10FULL;
- if (ADVERTISED_100baseT_Half)
- tmp |= ADVERTISE_100HALF;
- if (ADVERTISED_100baseT_Full)
- tmp |= ADVERTISE_100FULL;
- if (advert != tmp) {
- mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp);
- mii->advertising = tmp;
- }
-
- /* turn on autonegotiation, and force a renegotiate */
- bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
- bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART);
- mii->mdio_write(dev, mii->phy_id, MII_BMCR, bmcr);
-
- mii->force_media = 0;
- } else {
- u32 bmcr, tmp;
-
- /* turn off auto negotiation, set speed and duplexity */
- bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
- tmp = bmcr & ~(BMCR_ANENABLE | BMCR_SPEED100 | BMCR_FULLDPLX);
- if (ecmd->speed == SPEED_100)
- tmp |= BMCR_SPEED100;
- if (ecmd->duplex == DUPLEX_FULL) {
- tmp |= BMCR_FULLDPLX;
- mii->full_duplex = 1;
- } else
- mii->full_duplex = 0;
- if (bmcr != tmp)
- mii->mdio_write(dev, mii->phy_id, MII_BMCR, tmp);
-
- mii->force_media = 1;
- }
- return 0;
-}
-
-int _kc_mii_link_ok (struct mii_if_info *mii)
-{
- /* first, a dummy read, needed to latch some MII phys */
- mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR);
- if (mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR) & BMSR_LSTATUS)
- return 1;
- return 0;
-}
-
-int _kc_mii_nway_restart (struct mii_if_info *mii)
-{
- int bmcr;
- int r = -EINVAL;
-
- /* if autoneg is off, it's an error */
- bmcr = mii->mdio_read(mii->dev, mii->phy_id, MII_BMCR);
-
- if (bmcr & BMCR_ANENABLE) {
- bmcr |= BMCR_ANRESTART;
- mii->mdio_write(mii->dev, mii->phy_id, MII_BMCR, bmcr);
- r = 0;
- }
-
- return r;
-}
-
-void _kc_mii_check_link (struct mii_if_info *mii)
-{
- int cur_link = mii_link_ok(mii);
- int prev_link = netif_carrier_ok(mii->dev);
-
- if (cur_link && !prev_link)
- netif_carrier_on(mii->dev);
- else if (prev_link && !cur_link)
- netif_carrier_off(mii->dev);
-}
-
-#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) )
-int _kc_generic_mii_ioctl(struct mii_if_info *mii_if,
- struct mii_ioctl_data *mii_data, int cmd,
- unsigned int *duplex_chg_out)
-{
- int rc = 0;
- unsigned int duplex_changed = 0;
-
- if (duplex_chg_out)
- *duplex_chg_out = 0;
-
- mii_data->phy_id &= mii_if->phy_id_mask;
- mii_data->reg_num &= mii_if->reg_num_mask;
-
- switch(cmd) {
- case SIOCDEVPRIVATE: /* binary compat, remove in 2.5 */
- case SIOCGMIIPHY:
- mii_data->phy_id = mii_if->phy_id;
- /* fall through */
-
- case SIOCDEVPRIVATE + 1:/* binary compat, remove in 2.5 */
- case SIOCGMIIREG:
- mii_data->val_out =
- mii_if->mdio_read(mii_if->dev, mii_data->phy_id,
- mii_data->reg_num);
- break;
-
- case SIOCDEVPRIVATE + 2:/* binary compat, remove in 2.5 */
- case SIOCSMIIREG: {
- u16 val = mii_data->val_in;
-
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
- if (mii_data->phy_id == mii_if->phy_id) {
- switch(mii_data->reg_num) {
- case MII_BMCR: {
- unsigned int new_duplex = 0;
- if (val & (BMCR_RESET|BMCR_ANENABLE))
- mii_if->force_media = 0;
- else
- mii_if->force_media = 1;
- if (mii_if->force_media &&
- (val & BMCR_FULLDPLX))
- new_duplex = 1;
- if (mii_if->full_duplex != new_duplex) {
- duplex_changed = 1;
- mii_if->full_duplex = new_duplex;
- }
- break;
- }
- case MII_ADVERTISE:
- mii_if->advertising = val;
- break;
- default:
- /* do nothing */
- break;
- }
- }
-
- mii_if->mdio_write(mii_if->dev, mii_data->phy_id,
- mii_data->reg_num, val);
- break;
- }
-
- default:
- rc = -EOPNOTSUPP;
- break;
- }
-
- if ((rc == 0) && (duplex_chg_out) && (duplex_changed))
- *duplex_chg_out = 1;
-
- return rc;
-}
-#endif /* > 2.4.6 */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING
deleted file mode 100644
index 5f297e5b..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING
+++ /dev/null
@@ -1,339 +0,0 @@
-
-"This software program is licensed subject to the GNU General Public License
-(GPL). Version 2, June 1991, available at
-<http://www.fsf.org/copyleft/gpl.html>"
-
-GNU General Public License
-
-Version 2, June 1991
-
-Copyright (C) 1989, 1991 Free Software Foundation, Inc.
-59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-Everyone is permitted to copy and distribute verbatim copies of this license
-document, but changing it is not allowed.
-
-Preamble
-
-The licenses for most software are designed to take away your freedom to
-share and change it. By contrast, the GNU General Public License is intended
-to guarantee your freedom to share and change free software--to make sure
-the software is free for all its users. This General Public License applies
-to most of the Free Software Foundation's software and to any other program
-whose authors commit to using it. (Some other Free Software Foundation
-software is covered by the GNU Library General Public License instead.) You
-can apply it to your programs, too.
-
-When we speak of free software, we are referring to freedom, not price. Our
-General Public Licenses are designed to make sure that you have the freedom
-to distribute copies of free software (and charge for this service if you
-wish), that you receive source code or can get it if you want it, that you
-can change the software or use pieces of it in new free programs; and that
-you know you can do these things.
-
-To protect your rights, we need to make restrictions that forbid anyone to
-deny you these rights or to ask you to surrender the rights. These
-restrictions translate to certain responsibilities for you if you distribute
-copies of the software, or if you modify it.
-
-For example, if you distribute copies of such a program, whether gratis or
-for a fee, you must give the recipients all the rights that you have. You
-must make sure that they, too, receive or can get the source code. And you
-must show them these terms so they know their rights.
-
-We protect your rights with two steps: (1) copyright the software, and (2)
-offer you this license which gives you legal permission to copy, distribute
-and/or modify the software.
-
-Also, for each author's protection and ours, we want to make certain that
-everyone understands that there is no warranty for this free software. If
-the software is modified by someone else and passed on, we want its
-recipients to know that what they have is not the original, so that any
-problems introduced by others will not reflect on the original authors'
-reputations.
-
-Finally, any free program is threatened constantly by software patents. We
-wish to avoid the danger that redistributors of a free program will
-individually obtain patent licenses, in effect making the program
-proprietary. To prevent this, we have made it clear that any patent must be
-licensed for everyone's free use or not licensed at all.
-
-The precise terms and conditions for copying, distribution and modification
-follow.
-
-TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
-0. This License applies to any program or other work which contains a notice
- placed by the copyright holder saying it may be distributed under the
- terms of this General Public License. The "Program", below, refers to any
- such program or work, and a "work based on the Program" means either the
- Program or any derivative work under copyright law: that is to say, a
- work containing the Program or a portion of it, either verbatim or with
- modifications and/or translated into another language. (Hereinafter,
- translation is included without limitation in the term "modification".)
- Each licensee is addressed as "you".
-
- Activities other than copying, distribution and modification are not
- covered by this License; they are outside its scope. The act of running
- the Program is not restricted, and the output from the Program is covered
- only if its contents constitute a work based on the Program (independent
- of having been made by running the Program). Whether that is true depends
- on what the Program does.
-
-1. You may copy and distribute verbatim copies of the Program's source code
- as you receive it, in any medium, provided that you conspicuously and
- appropriately publish on each copy an appropriate copyright notice and
- disclaimer of warranty; keep intact all the notices that refer to this
- License and to the absence of any warranty; and give any other recipients
- of the Program a copy of this License along with the Program.
-
- You may charge a fee for the physical act of transferring a copy, and you
- may at your option offer warranty protection in exchange for a fee.
-
-2. You may modify your copy or copies of the Program or any portion of it,
- thus forming a work based on the Program, and copy and distribute such
- modifications or work under the terms of Section 1 above, provided that
- you also meet all of these conditions:
-
- * a) You must cause the modified files to carry prominent notices stating
- that you changed the files and the date of any change.
-
- * b) You must cause any work that you distribute or publish, that in
- whole or in part contains or is derived from the Program or any part
- thereof, to be licensed as a whole at no charge to all third parties
- under the terms of this License.
-
- * c) If the modified program normally reads commands interactively when
- run, you must cause it, when started running for such interactive
- use in the most ordinary way, to print or display an announcement
- including an appropriate copyright notice and a notice that there is
- no warranty (or else, saying that you provide a warranty) and that
- users may redistribute the program under these conditions, and
- telling the user how to view a copy of this License. (Exception: if
- the Program itself is interactive but does not normally print such
- an announcement, your work based on the Program is not required to
- print an announcement.)
-
- These requirements apply to the modified work as a whole. If identifiable
- sections of that work are not derived from the Program, and can be
- reasonably considered independent and separate works in themselves, then
- this License, and its terms, do not apply to those sections when you
- distribute them as separate works. But when you distribute the same
- sections as part of a whole which is a work based on the Program, the
- distribution of the whole must be on the terms of this License, whose
- permissions for other licensees extend to the entire whole, and thus to
- each and every part regardless of who wrote it.
-
- Thus, it is not the intent of this section to claim rights or contest
- your rights to work written entirely by you; rather, the intent is to
- exercise the right to control the distribution of derivative or
- collective works based on the Program.
-
- In addition, mere aggregation of another work not based on the Program
- with the Program (or with a work based on the Program) on a volume of a
- storage or distribution medium does not bring the other work under the
- scope of this License.
-
-3. You may copy and distribute the Program (or a work based on it, under
- Section 2) in object code or executable form under the terms of Sections
- 1 and 2 above provided that you also do one of the following:
-
- * a) Accompany it with the complete corresponding machine-readable source
- code, which must be distributed under the terms of Sections 1 and 2
- above on a medium customarily used for software interchange; or,
-
- * b) Accompany it with a written offer, valid for at least three years,
- to give any third party, for a charge no more than your cost of
- physically performing source distribution, a complete machine-
- readable copy of the corresponding source code, to be distributed
- under the terms of Sections 1 and 2 above on a medium customarily
- used for software interchange; or,
-
- * c) Accompany it with the information you received as to the offer to
- distribute corresponding source code. (This alternative is allowed
- only for noncommercial distribution and only if you received the
- program in object code or executable form with such an offer, in
- accord with Subsection b above.)
-
- The source code for a work means the preferred form of the work for
- making modifications to it. For an executable work, complete source code
- means all the source code for all modules it contains, plus any
- associated interface definition files, plus the scripts used to control
- compilation and installation of the executable. However, as a special
- exception, the source code distributed need not include anything that is
- normally distributed (in either source or binary form) with the major
- components (compiler, kernel, and so on) of the operating system on which
- the executable runs, unless that component itself accompanies the
- executable.
-
- If distribution of executable or object code is made by offering access
- to copy from a designated place, then offering equivalent access to copy
- the source code from the same place counts as distribution of the source
- code, even though third parties are not compelled to copy the source
- along with the object code.
-
-4. You may not copy, modify, sublicense, or distribute the Program except as
- expressly provided under this License. Any attempt otherwise to copy,
- modify, sublicense or distribute the Program is void, and will
- automatically terminate your rights under this License. However, parties
- who have received copies, or rights, from you under this License will not
- have their licenses terminated so long as such parties remain in full
- compliance.
-
-5. You are not required to accept this License, since you have not signed
- it. However, nothing else grants you permission to modify or distribute
- the Program or its derivative works. These actions are prohibited by law
- if you do not accept this License. Therefore, by modifying or
- distributing the Program (or any work based on the Program), you
- indicate your acceptance of this License to do so, and all its terms and
- conditions for copying, distributing or modifying the Program or works
- based on it.
-
-6. Each time you redistribute the Program (or any work based on the
- Program), the recipient automatically receives a license from the
- original licensor to copy, distribute or modify the Program subject to
- these terms and conditions. You may not impose any further restrictions
- on the recipients' exercise of the rights granted herein. You are not
- responsible for enforcing compliance by third parties to this License.
-
-7. If, as a consequence of a court judgment or allegation of patent
- infringement or for any other reason (not limited to patent issues),
- conditions are imposed on you (whether by court order, agreement or
- otherwise) that contradict the conditions of this License, they do not
- excuse you from the conditions of this License. If you cannot distribute
- so as to satisfy simultaneously your obligations under this License and
- any other pertinent obligations, then as a consequence you may not
- distribute the Program at all. For example, if a patent license would
- not permit royalty-free redistribution of the Program by all those who
- receive copies directly or indirectly through you, then the only way you
- could satisfy both it and this License would be to refrain entirely from
- distribution of the Program.
-
- If any portion of this section is held invalid or unenforceable under any
- particular circumstance, the balance of the section is intended to apply
- and the section as a whole is intended to apply in other circumstances.
-
- It is not the purpose of this section to induce you to infringe any
- patents or other property right claims or to contest validity of any
- such claims; this section has the sole purpose of protecting the
- integrity of the free software distribution system, which is implemented
- by public license practices. Many people have made generous contributions
- to the wide range of software distributed through that system in
- reliance on consistent application of that system; it is up to the
- author/donor to decide if he or she is willing to distribute software
- through any other system and a licensee cannot impose that choice.
-
- This section is intended to make thoroughly clear what is believed to be
- a consequence of the rest of this License.
-
-8. If the distribution and/or use of the Program is restricted in certain
- countries either by patents or by copyrighted interfaces, the original
- copyright holder who places the Program under this License may add an
- explicit geographical distribution limitation excluding those countries,
- so that distribution is permitted only in or among countries not thus
- excluded. In such case, this License incorporates the limitation as if
- written in the body of this License.
-
-9. The Free Software Foundation may publish revised and/or new versions of
- the General Public License from time to time. Such new versions will be
- similar in spirit to the present version, but may differ in detail to
- address new problems or concerns.
-
- Each version is given a distinguishing version number. If the Program
- specifies a version number of this License which applies to it and "any
- later version", you have the option of following the terms and
- conditions either of that version or of any later version published by
- the Free Software Foundation. If the Program does not specify a version
- number of this License, you may choose any version ever published by the
- Free Software Foundation.
-
-10. If you wish to incorporate parts of the Program into other free programs
- whose distribution conditions are different, write to the author to ask
- for permission. For software which is copyrighted by the Free Software
- Foundation, write to the Free Software Foundation; we sometimes make
- exceptions for this. Our decision will be guided by the two goals of
- preserving the free status of all derivatives of our free software and
- of promoting the sharing and reuse of software generally.
-
- NO WARRANTY
-
-11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
- FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
- OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
- PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
- EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
- ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH
- YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
- NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
- WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
- REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR
- DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL
- DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM
- (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED
- INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF
- THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR
- OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
-
-END OF TERMS AND CONDITIONS
-
-How to Apply These Terms to Your New Programs
-
-If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it free
-software which everyone can redistribute and change under these terms.
-
-To do so, attach the following notices to the program. It is safest to
-attach them to the start of each source file to most effectively convey the
-exclusion of warranty; and each file should have at least the "copyright"
-line and a pointer to where the full notice is found.
-
-one line to give the program's name and an idea of what it does.
-Copyright (C) yyyy name of author
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2 of the License, or (at your option)
-any later version.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59
-Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-Also add information on how to contact you by electronic and paper mail.
-
-If the program is interactive, make it output a short notice like this when
-it starts in an interactive mode:
-
-Gnomovision version 69, Copyright (C) year name of author Gnomovision comes
-with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free
-software, and you are welcome to redistribute it under certain conditions;
-type 'show c' for details.
-
-The hypothetical commands 'show w' and 'show c' should show the appropriate
-parts of the General Public License. Of course, the commands you use may be
-called something other than 'show w' and 'show c'; they could even be
-mouse-clicks or menu items--whatever suits your program.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a "copyright disclaimer" for the program, if
-necessary. Here is a sample; alter the names:
-
-Yoyodyne, Inc., hereby disclaims all copyright interest in the program
-'Gnomovision' (which makes passes at compilers) written by James Hacker.
-
-signature of Ty Coon, 1 April 1989
-Ty Coon, President of Vice
-
-This General Public License does not permit incorporating your program into
-proprietary programs. If your program is a subroutine library, you may
-consider it more useful to permit linking proprietary applications with the
-library. If this is what you want to do, use the GNU Library General Public
-License instead of this License.
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h
index 222c2c71..59415469 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c
index 24015844..e17b7f18 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h
index c6abb020..00a584f4 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
index c6f4130d..30de47eb 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h
index 02be92ab..41024400 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
index ef7ce629..f00fe796 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h
index a6ab30d2..98b74000 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
index 93659ca0..88b33fa0 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h
index 9bd6f534..6ae5926f 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h
index a6690451..5e6f9ac9 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
index 11472bd3..bc3cb2f4 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h
index cad28622..48f7dcfc 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
index 238028d0..d26016c9 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h
index 124f00de..5ced84f8 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h
index d161600b..c6f8e21f 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c
index e3f5275e..234fa632 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h
index bbe5a9e3..5ae171ac 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h
deleted file mode 100644
index 5e3559fd..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*******************************************************************************
-
- Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2012 Intel Corporation.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms and conditions of the GNU General Public License,
- version 2, as published by the Free Software Foundation.
-
- This program is distributed in the hope it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
- The full GNU General Public License is included in this distribution in
- the file called "COPYING".
-
- Contact Information:
- e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-
-#ifndef _IXGBE_SRIOV_H_
-#define _IXGBE_SRIOV_H_
-
-int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
- int entries, u16 *hash_list, u32 vf);
-void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter);
-int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, u32 vf);
-void ixgbe_set_vmolr(struct ixgbe_hw *hw, u32 vf, bool aupe);
-void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf);
-void ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf);
-void ixgbe_msg_task(struct ixgbe_adapter *adapter);
-int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter,
- int vf, unsigned char *mac_addr);
-void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter);
-void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter);
-#ifdef IFLA_VF_MAX
-int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac);
-int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan,
- u8 qos);
-int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
-#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
-int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
-#endif
-int ixgbe_ndo_get_vf_config(struct net_device *netdev,
- int vf, struct ifla_vf_info *ivi);
-#endif
-void ixgbe_disable_sriov(struct ixgbe_adapter *adapter);
-#ifdef CONFIG_PCI_IOV
-int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask);
-void ixgbe_enable_sriov(struct ixgbe_adapter *adapter);
-#endif
-int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter);
-#ifdef IFLA_VF_MAX
-void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter);
-#endif /* IFLA_VF_MAX */
-void ixgbe_dump_registers(struct ixgbe_adapter *adapter);
-
-/*
- * These are defined in ixgbe_type.h on behalf of the VF driver
- * but we need them here unwrapped for the PF driver.
- */
-#define IXGBE_DEV_ID_82599_VF 0x10ED
-#define IXGBE_DEV_ID_X540_VF 0x1515
-
-#endif /* _IXGBE_SRIOV_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h
index 6b21c879..bda61fa4 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c
index b99d9e84..2affe242 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h
index 77e8952d..38bcc87b 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c
index 5f2523ed..d84c7ccb 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
index bf27579b..4c7a6408 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
@@ -17,7 +17,7 @@
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
- the file called "COPYING".
+ the file called "LICENSE.GPL".
Contact Information:
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
@@ -3140,4 +3140,16 @@ static inline int __kc_pci_vfs_assigned(struct pci_dev *dev)
#define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops))
#endif /* >= 3.16.0 */
+/*
+ * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4)
+ * For older kernels backported this commit, need to use renamed functions.
+ * This fix is specific to RedHat/CentOS kernels.
+ */
+#if (defined(RHEL_RELEASE_CODE) && \
+ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8) && \
+ LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34))
+#define vlan_tx_tag_get skb_vlan_tag_get
+#define vlan_tx_tag_present skb_vlan_tag_present
+#endif
+
#endif /* _KCOMPAT_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/kni_dev.h b/lib/librte_eal/linuxapp/kni/kni_dev.h
index a0e5cb6b..58cbadd3 100644
--- a/lib/librte_eal/linuxapp/kni/kni_dev.h
+++ b/lib/librte_eal/linuxapp/kni/kni_dev.h
@@ -25,6 +25,11 @@
#ifndef _KNI_DEV_H_
#define _KNI_DEV_H_
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/if.h>
#include <linux/wait.h>
#include <linux/sched.h>
@@ -39,10 +44,11 @@
#include <exec-env/rte_kni_common.h>
#define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
+#define MBUF_BURST_SZ 32
+
/**
* A structure describing the private information for a kni device.
*/
-
struct kni_dev {
/* kni list */
struct list_head list;
@@ -50,7 +56,7 @@ struct kni_dev {
struct net_device_stats stats;
int status;
uint16_t group_id; /* Group ID of a group of KNI devices */
- unsigned core_id; /* Core ID to bind */
+ uint32_t core_id; /* Core ID to bind */
char name[RTE_KNI_NAMESIZE]; /* Network device name */
struct task_struct *pthread;
@@ -84,38 +90,36 @@ struct kni_dev {
/* response queue */
void *resp_q;
- void * sync_kva;
+ void *sync_kva;
void *sync_va;
void *mbuf_kva;
void *mbuf_va;
/* mbuf size */
- unsigned mbuf_size;
+ uint32_t mbuf_size;
/* synchro for request processing */
unsigned long synchro;
#ifdef RTE_KNI_VHOST
- struct kni_vhost_queue* vhost_queue;
+ struct kni_vhost_queue *vhost_queue;
+
volatile enum {
BE_STOP = 0x1,
BE_START = 0x2,
BE_FINISH = 0x4,
- }vq_status;
+ } vq_status;
#endif
+ /* buffers */
+ void *pa[MBUF_BURST_SZ];
+ void *va[MBUF_BURST_SZ];
+ void *alloc_pa[MBUF_BURST_SZ];
+ void *alloc_va[MBUF_BURST_SZ];
};
-#define KNI_ERR(args...) printk(KERN_DEBUG "KNI: Error: " args)
-#define KNI_PRINT(args...) printk(KERN_DEBUG "KNI: " args)
-#ifdef RTE_KNI_KO_DEBUG
- #define KNI_DBG(args...) printk(KERN_DEBUG "KNI: " args)
-#else
- #define KNI_DBG(args...)
-#endif
-
#ifdef RTE_KNI_VHOST
-unsigned int
+uint32_t
kni_poll(struct file *file, struct socket *sock, poll_table * wait);
int kni_chk_vhost_rx(struct kni_dev *kni);
int kni_vhost_init(struct kni_dev *kni);
@@ -127,23 +131,22 @@ struct kni_vhost_queue {
int vnet_hdr_sz;
struct kni_dev *kni;
int sockfd;
- unsigned int flags;
- struct sk_buff* cache;
- struct rte_kni_fifo* fifo;
+ uint32_t flags;
+ struct sk_buff *cache;
+ struct rte_kni_fifo *fifo;
};
#endif
-#ifdef RTE_KNI_VHOST_DEBUG_RX
- #define KNI_DBG_RX(args...) printk(KERN_DEBUG "KNI RX: " args)
-#else
- #define KNI_DBG_RX(args...)
-#endif
+void kni_net_rx(struct kni_dev *kni);
+void kni_net_init(struct net_device *dev);
+void kni_net_config_lo_mode(char *lo_str);
+void kni_net_poll_resp(struct kni_dev *kni);
+void kni_set_ethtool_ops(struct net_device *netdev);
-#ifdef RTE_KNI_VHOST_DEBUG_TX
- #define KNI_DBG_TX(args...) printk(KERN_DEBUG "KNI TX: " args)
-#else
- #define KNI_DBG_TX(args...)
-#endif
+int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
+void ixgbe_kni_remove(struct pci_dev *pdev);
+int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
+void igb_kni_remove(struct pci_dev *pdev);
#endif
diff --git a/lib/librte_eal/linuxapp/kni/kni_ethtool.c b/lib/librte_eal/linuxapp/kni/kni_ethtool.c
index 06b6d463..0c88589c 100644
--- a/lib/librte_eal/linuxapp/kni/kni_ethtool.c
+++ b/lib/librte_eal/linuxapp/kni/kni_ethtool.c
@@ -31,6 +31,7 @@ static int
kni_check_if_running(struct net_device *dev)
{
struct kni_dev *priv = netdev_priv(dev);
+
if (priv->lad_dev)
return 0;
else
@@ -41,6 +42,7 @@ static void
kni_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
{
struct kni_dev *priv = netdev_priv(dev);
+
priv->lad_dev->ethtool_ops->get_drvinfo(priv->lad_dev, info);
}
@@ -48,6 +50,7 @@ static int
kni_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
{
struct kni_dev *priv = netdev_priv(dev);
+
return priv->lad_dev->ethtool_ops->get_settings(priv->lad_dev, ecmd);
}
@@ -55,6 +58,7 @@ static int
kni_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
{
struct kni_dev *priv = netdev_priv(dev);
+
return priv->lad_dev->ethtool_ops->set_settings(priv->lad_dev, ecmd);
}
@@ -62,6 +66,7 @@ static void
kni_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
{
struct kni_dev *priv = netdev_priv(dev);
+
priv->lad_dev->ethtool_ops->get_wol(priv->lad_dev, wol);
}
@@ -69,6 +74,7 @@ static int
kni_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
{
struct kni_dev *priv = netdev_priv(dev);
+
return priv->lad_dev->ethtool_ops->set_wol(priv->lad_dev, wol);
}
@@ -76,6 +82,7 @@ static int
kni_nway_reset(struct net_device *dev)
{
struct kni_dev *priv = netdev_priv(dev);
+
return priv->lad_dev->ethtool_ops->nway_reset(priv->lad_dev);
}
@@ -83,6 +90,7 @@ static int
kni_get_eeprom_len(struct net_device *dev)
{
struct kni_dev *priv = netdev_priv(dev);
+
return priv->lad_dev->ethtool_ops->get_eeprom_len(priv->lad_dev);
}
@@ -91,6 +99,7 @@ kni_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
u8 *bytes)
{
struct kni_dev *priv = netdev_priv(dev);
+
return priv->lad_dev->ethtool_ops->get_eeprom(priv->lad_dev, eeprom,
bytes);
}
@@ -100,6 +109,7 @@ kni_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
u8 *bytes)
{
struct kni_dev *priv = netdev_priv(dev);
+
return priv->lad_dev->ethtool_ops->set_eeprom(priv->lad_dev, eeprom,
bytes);
}
@@ -108,6 +118,7 @@ static void
kni_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring)
{
struct kni_dev *priv = netdev_priv(dev);
+
priv->lad_dev->ethtool_ops->get_ringparam(priv->lad_dev, ring);
}
@@ -115,6 +126,7 @@ static int
kni_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring)
{
struct kni_dev *priv = netdev_priv(dev);
+
return priv->lad_dev->ethtool_ops->set_ringparam(priv->lad_dev, ring);
}
@@ -122,6 +134,7 @@ static void
kni_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause)
{
struct kni_dev *priv = netdev_priv(dev);
+
priv->lad_dev->ethtool_ops->get_pauseparam(priv->lad_dev, pause);
}
@@ -129,6 +142,7 @@ static int
kni_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause)
{
struct kni_dev *priv = netdev_priv(dev);
+
return priv->lad_dev->ethtool_ops->set_pauseparam(priv->lad_dev,
pause);
}
@@ -137,6 +151,7 @@ static u32
kni_get_msglevel(struct net_device *dev)
{
struct kni_dev *priv = netdev_priv(dev);
+
return priv->lad_dev->ethtool_ops->get_msglevel(priv->lad_dev);
}
@@ -144,6 +159,7 @@ static void
kni_set_msglevel(struct net_device *dev, u32 data)
{
struct kni_dev *priv = netdev_priv(dev);
+
priv->lad_dev->ethtool_ops->set_msglevel(priv->lad_dev, data);
}
@@ -151,6 +167,7 @@ static int
kni_get_regs_len(struct net_device *dev)
{
struct kni_dev *priv = netdev_priv(dev);
+
return priv->lad_dev->ethtool_ops->get_regs_len(priv->lad_dev);
}
@@ -158,6 +175,7 @@ static void
kni_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *p)
{
struct kni_dev *priv = netdev_priv(dev);
+
priv->lad_dev->ethtool_ops->get_regs(priv->lad_dev, regs, p);
}
@@ -165,6 +183,7 @@ static void
kni_get_strings(struct net_device *dev, u32 stringset, u8 *data)
{
struct kni_dev *priv = netdev_priv(dev);
+
priv->lad_dev->ethtool_ops->get_strings(priv->lad_dev, stringset,
data);
}
@@ -173,6 +192,7 @@ static int
kni_get_sset_count(struct net_device *dev, int sset)
{
struct kni_dev *priv = netdev_priv(dev);
+
return priv->lad_dev->ethtool_ops->get_sset_count(priv->lad_dev, sset);
}
@@ -181,24 +201,25 @@ kni_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats,
u64 *data)
{
struct kni_dev *priv = netdev_priv(dev);
+
priv->lad_dev->ethtool_ops->get_ethtool_stats(priv->lad_dev, stats,
data);
}
struct ethtool_ops kni_ethtool_ops = {
- .begin = kni_check_if_running,
+ .begin = kni_check_if_running,
.get_drvinfo = kni_get_drvinfo,
.get_settings = kni_get_settings,
.set_settings = kni_set_settings,
.get_regs_len = kni_get_regs_len,
- .get_regs = kni_get_regs,
- .get_wol = kni_get_wol,
- .set_wol = kni_set_wol,
- .nway_reset = kni_nway_reset,
- .get_link = ethtool_op_get_link,
+ .get_regs = kni_get_regs,
+ .get_wol = kni_get_wol,
+ .set_wol = kni_set_wol,
+ .nway_reset = kni_nway_reset,
+ .get_link = ethtool_op_get_link,
.get_eeprom_len = kni_get_eeprom_len,
- .get_eeprom = kni_get_eeprom,
- .set_eeprom = kni_set_eeprom,
+ .get_eeprom = kni_get_eeprom,
+ .set_eeprom = kni_set_eeprom,
.get_ringparam = kni_get_ringparam,
.set_ringparam = kni_set_ringparam,
.get_pauseparam = kni_get_pauseparam,
@@ -207,7 +228,7 @@ struct ethtool_ops kni_ethtool_ops = {
.set_msglevel = kni_set_msglevel,
.get_strings = kni_get_strings,
.get_sset_count = kni_get_sset_count,
- .get_ethtool_stats = kni_get_ethtool_stats,
+ .get_ethtool_stats = kni_get_ethtool_stats,
};
void
diff --git a/lib/librte_eal/linuxapp/kni/kni_fifo.h b/lib/librte_eal/linuxapp/kni/kni_fifo.h
index 3ea750e2..025ec1c9 100644
--- a/lib/librte_eal/linuxapp/kni/kni_fifo.h
+++ b/lib/librte_eal/linuxapp/kni/kni_fifo.h
@@ -30,13 +30,13 @@
/**
* Adds num elements into the fifo. Return the number actually written
*/
-static inline unsigned
-kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num)
+static inline uint32_t
+kni_fifo_put(struct rte_kni_fifo *fifo, void **data, uint32_t num)
{
- unsigned i = 0;
- unsigned fifo_write = fifo->write;
- unsigned fifo_read = fifo->read;
- unsigned new_write = fifo_write;
+ uint32_t i = 0;
+ uint32_t fifo_write = fifo->write;
+ uint32_t fifo_read = fifo->read;
+ uint32_t new_write = fifo_write;
for (i = 0; i < num; i++) {
new_write = (new_write + 1) & (fifo->len - 1);
@@ -54,12 +54,12 @@ kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num)
/**
* Get up to num elements from the fifo. Return the number actully read
*/
-static inline unsigned
-kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
+static inline uint32_t
+kni_fifo_get(struct rte_kni_fifo *fifo, void **data, uint32_t num)
{
- unsigned i = 0;
- unsigned new_read = fifo->read;
- unsigned fifo_write = fifo->write;
+ uint32_t i = 0;
+ uint32_t new_read = fifo->read;
+ uint32_t fifo_write = fifo->write;
for (i = 0; i < num; i++) {
if (new_read == fifo_write)
@@ -76,16 +76,16 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
/**
* Get the num of elements in the fifo
*/
-static inline unsigned
+static inline uint32_t
kni_fifo_count(struct rte_kni_fifo *fifo)
{
- return (fifo->len + fifo->write - fifo->read) & ( fifo->len - 1);
+ return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1);
}
/**
* Get the num of available elements in the fifo
*/
-static inline unsigned
+static inline uint32_t
kni_fifo_free_count(struct rte_kni_fifo *fifo)
{
return (fifo->read - fifo->write - 1) & (fifo->len - 1);
@@ -96,7 +96,7 @@ kni_fifo_free_count(struct rte_kni_fifo *fifo)
* Initializes the kni fifo structure
*/
static inline void
-kni_fifo_init(struct rte_kni_fifo *fifo, unsigned size)
+kni_fifo_init(struct rte_kni_fifo *fifo, uint32_t size)
{
fifo->write = 0;
fifo->read = 0;
diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c
index 59d15ca6..497db9bd 100644
--- a/lib/librte_eal/linuxapp/kni/kni_misc.c
+++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
@@ -30,6 +30,7 @@
#include <linux/pci.h>
#include <linux/kthread.h>
#include <linux/rwsem.h>
+#include <linux/mutex.h>
#include <linux/nsproxy.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -47,52 +48,15 @@ MODULE_DESCRIPTION("Kernel Module for managing kni devices");
#define KNI_MAX_DEVICES 32
-extern void kni_net_rx(struct kni_dev *kni);
-extern void kni_net_init(struct net_device *dev);
-extern void kni_net_config_lo_mode(char *lo_str);
-extern void kni_net_poll_resp(struct kni_dev *kni);
-extern void kni_set_ethtool_ops(struct net_device *netdev);
-
-extern int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
-extern void ixgbe_kni_remove(struct pci_dev *pdev);
-extern int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
-extern void igb_kni_remove(struct pci_dev *pdev);
-
-static int kni_open(struct inode *inode, struct file *file);
-static int kni_release(struct inode *inode, struct file *file);
-static int kni_ioctl(struct inode *inode, unsigned int ioctl_num,
- unsigned long ioctl_param);
-static int kni_compat_ioctl(struct inode *inode, unsigned int ioctl_num,
- unsigned long ioctl_param);
-static int kni_dev_remove(struct kni_dev *dev);
-
-static int __init kni_parse_kthread_mode(void);
-
-/* KNI processing for single kernel thread mode */
-static int kni_thread_single(void *unused);
-/* KNI processing for multiple kernel thread mode */
-static int kni_thread_multiple(void *param);
-
-static struct file_operations kni_fops = {
- .owner = THIS_MODULE,
- .open = kni_open,
- .release = kni_release,
- .unlocked_ioctl = (void *)kni_ioctl,
- .compat_ioctl = (void *)kni_compat_ioctl,
-};
-
-static struct miscdevice kni_misc = {
- .minor = MISC_DYNAMIC_MINOR,
- .name = KNI_DEVICE,
- .fops = &kni_fops,
-};
+extern const struct pci_device_id ixgbe_pci_tbl[];
+extern const struct pci_device_id igb_pci_tbl[];
/* loopback mode */
-static char *lo_mode = NULL;
+static char *lo_mode;
/* Kernel thread mode */
-static char *kthread_mode = NULL;
-static unsigned multiple_kthread_on = 0;
+static char *kthread_mode;
+static uint32_t multiple_kthread_on;
#define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
@@ -100,20 +64,24 @@ static int kni_net_id;
struct kni_net {
unsigned long device_in_use; /* device in use flag */
+ struct mutex kni_kthread_lock;
struct task_struct *kni_kthread;
struct rw_semaphore kni_list_lock;
struct list_head kni_list_head;
};
-static int __net_init kni_init_net(struct net *net)
+static int __net_init
+kni_init_net(struct net *net)
{
#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
struct kni_net *knet = net_generic(net, kni_net_id);
+
+ memset(knet, 0, sizeof(*knet));
#else
struct kni_net *knet;
int ret;
- knet = kmalloc(sizeof(struct kni_net), GFP_KERNEL);
+ knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL);
if (!knet) {
ret = -ENOMEM;
return ret;
@@ -123,6 +91,8 @@ static int __net_init kni_init_net(struct net *net)
/* Clear the bit of device in use */
clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
+ mutex_init(&knet->kni_kthread_lock);
+
init_rwsem(&knet->kni_list_lock);
INIT_LIST_HEAD(&knet->kni_list_head);
@@ -137,11 +107,15 @@ static int __net_init kni_init_net(struct net *net)
#endif
}
-static void __net_exit kni_exit_net(struct net *net)
+static void __net_exit
+kni_exit_net(struct net *net)
{
-#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
- struct kni_net *knet = net_generic(net, kni_net_id);
+ struct kni_net *knet __maybe_unused;
+
+ knet = net_generic(net, kni_net_id);
+ mutex_destroy(&knet->kni_kthread_lock);
+#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
kfree(knet);
#endif
}
@@ -155,72 +129,56 @@ static struct pernet_operations kni_net_ops = {
#endif
};
-static int __init
-kni_init(void)
+static int
+kni_thread_single(void *data)
{
- int rc;
-
- KNI_PRINT("######## DPDK kni module loading ########\n");
-
- if (kni_parse_kthread_mode() < 0) {
- KNI_ERR("Invalid parameter for kthread_mode\n");
- return -EINVAL;
- }
+ struct kni_net *knet = data;
+ int j;
+ struct kni_dev *dev;
-#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
- rc = register_pernet_subsys(&kni_net_ops);
+ while (!kthread_should_stop()) {
+ down_read(&knet->kni_list_lock);
+ for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
+ list_for_each_entry(dev, &knet->kni_list_head, list) {
+#ifdef RTE_KNI_VHOST
+ kni_chk_vhost_rx(dev);
#else
- rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+ kni_net_rx(dev);
+#endif
+ kni_net_poll_resp(dev);
+ }
+ }
+ up_read(&knet->kni_list_lock);
+#ifdef RTE_KNI_PREEMPT_DEFAULT
+ /* reschedule out for a while */
+ schedule_timeout_interruptible(
+ usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
#endif
- if (rc)
- return -EPERM;
-
- rc = misc_register(&kni_misc);
- if (rc != 0) {
- KNI_ERR("Misc registration failed\n");
- goto out;
}
- /* Configure the lo mode according to the input parameter */
- kni_net_config_lo_mode(lo_mode);
-
- KNI_PRINT("######## DPDK kni module loaded ########\n");
-
return 0;
-
-out:
-#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
- unregister_pernet_subsys(&kni_net_ops);
-#else
- register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
-#endif
- return rc;
}
-static void __exit
-kni_exit(void)
+static int
+kni_thread_multiple(void *param)
{
- misc_deregister(&kni_misc);
-#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
- unregister_pernet_subsys(&kni_net_ops);
+ int j;
+ struct kni_dev *dev = (struct kni_dev *)param;
+
+ while (!kthread_should_stop()) {
+ for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
+#ifdef RTE_KNI_VHOST
+ kni_chk_vhost_rx(dev);
#else
- register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+ kni_net_rx(dev);
#endif
- KNI_PRINT("####### DPDK kni module unloaded #######\n");
-}
-
-static int __init
-kni_parse_kthread_mode(void)
-{
- if (!kthread_mode)
- return 0;
-
- if (strcmp(kthread_mode, "single") == 0)
- return 0;
- else if (strcmp(kthread_mode, "multiple") == 0)
- multiple_kthread_on = 1;
- else
- return -1;
+ kni_net_poll_resp(dev);
+ }
+#ifdef RTE_KNI_PREEMPT_DEFAULT
+ schedule_timeout_interruptible(
+ usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
+#endif
+ }
return 0;
}
@@ -235,21 +193,29 @@ kni_open(struct inode *inode, struct file *file)
if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
return -EBUSY;
- /* Create kernel thread for single mode */
- if (multiple_kthread_on == 0) {
- KNI_PRINT("Single kernel thread for all KNI devices\n");
- /* Create kernel thread for RX */
- knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,
- "kni_single");
- if (IS_ERR(knet->kni_kthread)) {
- KNI_ERR("Unable to create kernel threaed\n");
- return PTR_ERR(knet->kni_kthread);
- }
- } else
- KNI_PRINT("Multiple kernel thread mode enabled\n");
-
file->private_data = get_net(net);
- KNI_PRINT("/dev/kni opened\n");
+ pr_debug("/dev/kni opened\n");
+
+ return 0;
+}
+
+static int
+kni_dev_remove(struct kni_dev *dev)
+{
+ if (!dev)
+ return -ENODEV;
+
+ if (dev->pci_dev) {
+ if (pci_match_id(ixgbe_pci_tbl, dev->pci_dev))
+ ixgbe_kni_remove(dev->pci_dev);
+ else if (pci_match_id(igb_pci_tbl, dev->pci_dev))
+ igb_kni_remove(dev->pci_dev);
+ }
+
+ if (dev->net_dev) {
+ unregister_netdev(dev->net_dev);
+ free_netdev(dev->net_dev);
+ }
return 0;
}
@@ -263,9 +229,13 @@ kni_release(struct inode *inode, struct file *file)
/* Stop kernel thread for single mode */
if (multiple_kthread_on == 0) {
+ mutex_lock(&knet->kni_kthread_lock);
/* Stop kernel thread */
- kthread_stop(knet->kni_kthread);
- knet->kni_kthread = NULL;
+ if (knet->kni_kthread != NULL) {
+ kthread_stop(knet->kni_kthread);
+ knet->kni_kthread = NULL;
+ }
+ mutex_unlock(&knet->kni_kthread_lock);
}
down_write(&knet->kni_list_lock);
@@ -288,110 +258,70 @@ kni_release(struct inode *inode, struct file *file)
clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
put_net(net);
- KNI_PRINT("/dev/kni closed\n");
+ pr_debug("/dev/kni closed\n");
return 0;
}
static int
-kni_thread_single(void *data)
+kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
{
- struct kni_net *knet = data;
- int j;
- struct kni_dev *dev;
+ if (!kni || !dev)
+ return -1;
- while (!kthread_should_stop()) {
- down_read(&knet->kni_list_lock);
- for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
- list_for_each_entry(dev, &knet->kni_list_head, list) {
-#ifdef RTE_KNI_VHOST
- kni_chk_vhost_rx(dev);
-#else
- kni_net_rx(dev);
-#endif
- kni_net_poll_resp(dev);
- }
- }
- up_read(&knet->kni_list_lock);
-#ifdef RTE_KNI_PREEMPT_DEFAULT
- /* reschedule out for a while */
- schedule_timeout_interruptible(usecs_to_jiffies( \
- KNI_KTHREAD_RESCHEDULE_INTERVAL));
-#endif
+ /* Check if network name has been used */
+ if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
+ pr_err("KNI name %s duplicated\n", dev->name);
+ return -1;
}
return 0;
}
static int
-kni_thread_multiple(void *param)
+kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind)
{
- int j;
- struct kni_dev *dev = (struct kni_dev *)param;
-
- while (!kthread_should_stop()) {
- for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
-#ifdef RTE_KNI_VHOST
- kni_chk_vhost_rx(dev);
-#else
- kni_net_rx(dev);
-#endif
- kni_net_poll_resp(dev);
+ /**
+ * Create a new kernel thread for multiple mode, set its core affinity,
+ * and finally wake it up.
+ */
+ if (multiple_kthread_on) {
+ kni->pthread = kthread_create(kni_thread_multiple,
+ (void *)kni, "kni_%s", kni->name);
+ if (IS_ERR(kni->pthread)) {
+ kni_dev_remove(kni);
+ return -ECANCELED;
}
-#ifdef RTE_KNI_PREEMPT_DEFAULT
- schedule_timeout_interruptible(usecs_to_jiffies( \
- KNI_KTHREAD_RESCHEDULE_INTERVAL));
-#endif
- }
-
- return 0;
-}
-
-static int
-kni_dev_remove(struct kni_dev *dev)
-{
- if (!dev)
- return -ENODEV;
-
- switch (dev->device_id) {
- #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
- #include <rte_pci_dev_ids.h>
- igb_kni_remove(dev->pci_dev);
- break;
- #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) case (dev):
- #include <rte_pci_dev_ids.h>
- ixgbe_kni_remove(dev->pci_dev);
- break;
- default:
- break;
- }
-
- if (dev->net_dev) {
- unregister_netdev(dev->net_dev);
- free_netdev(dev->net_dev);
- }
- return 0;
-}
+ if (force_bind)
+ kthread_bind(kni->pthread, kni->core_id);
+ wake_up_process(kni->pthread);
+ } else {
+ mutex_lock(&knet->kni_kthread_lock);
+
+ if (knet->kni_kthread == NULL) {
+ knet->kni_kthread = kthread_create(kni_thread_single,
+ (void *)knet, "kni_single");
+ if (IS_ERR(knet->kni_kthread)) {
+ mutex_unlock(&knet->kni_kthread_lock);
+ kni_dev_remove(kni);
+ return -ECANCELED;
+ }
-static int
-kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
-{
- if (!kni || !dev)
- return -1;
+ if (force_bind)
+ kthread_bind(knet->kni_kthread, kni->core_id);
+ wake_up_process(knet->kni_kthread);
+ }
- /* Check if network name has been used */
- if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
- KNI_ERR("KNI name %s duplicated\n", dev->name);
- return -1;
+ mutex_unlock(&knet->kni_kthread_lock);
}
return 0;
}
static int
-kni_ioctl_create(struct net *net,
- unsigned int ioctl_num, unsigned long ioctl_param)
+kni_ioctl_create(struct net *net, uint32_t ioctl_num,
+ unsigned long ioctl_param)
{
struct kni_net *knet = net_generic(net, kni_net_id);
int ret;
@@ -402,7 +332,7 @@ kni_ioctl_create(struct net *net,
struct net_device *lad_dev = NULL;
struct kni_dev *kni, *dev, *n;
- printk(KERN_INFO "KNI: Creating kni...\n");
+ pr_info("Creating kni...\n");
/* Check the buffer size, to avoid warning */
if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
return -EINVAL;
@@ -410,17 +340,15 @@ kni_ioctl_create(struct net *net,
/* Copy kni info from user space */
ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
if (ret) {
- KNI_ERR("copy_from_user in kni_ioctl_create");
+ pr_err("copy_from_user in kni_ioctl_create");
return -EIO;
}
/**
- * Check if the cpu core id is valid for binding,
- * for multiple kernel thread mode.
+ * Check if the cpu core id is valid for binding.
*/
- if (multiple_kthread_on && dev_info.force_bind &&
- !cpu_online(dev_info.core_id)) {
- KNI_ERR("cpu %u is not online\n", dev_info.core_id);
+ if (dev_info.force_bind && !cpu_online(dev_info.core_id)) {
+ pr_err("cpu %u is not online\n", dev_info.core_id);
return -EINVAL;
}
@@ -440,7 +368,7 @@ kni_ioctl_create(struct net *net,
#endif
kni_net_init);
if (net_dev == NULL) {
- KNI_ERR("error allocating device \"%s\"\n", dev_info.name);
+ pr_err("error allocating device \"%s\"\n", dev_info.name);
return -EBUSY;
}
@@ -464,33 +392,27 @@ kni_ioctl_create(struct net *net,
kni->sync_va = dev_info.sync_va;
kni->sync_kva = phys_to_virt(dev_info.sync_phys);
- kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys);
- kni->mbuf_va = dev_info.mbuf_va;
-
#ifdef RTE_KNI_VHOST
kni->vhost_queue = NULL;
kni->vq_status = BE_STOP;
#endif
kni->mbuf_size = dev_info.mbuf_size;
- KNI_PRINT("tx_phys: 0x%016llx, tx_q addr: 0x%p\n",
+ pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n",
(unsigned long long) dev_info.tx_phys, kni->tx_q);
- KNI_PRINT("rx_phys: 0x%016llx, rx_q addr: 0x%p\n",
+ pr_debug("rx_phys: 0x%016llx, rx_q addr: 0x%p\n",
(unsigned long long) dev_info.rx_phys, kni->rx_q);
- KNI_PRINT("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n",
+ pr_debug("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n",
(unsigned long long) dev_info.alloc_phys, kni->alloc_q);
- KNI_PRINT("free_phys: 0x%016llx, free_q addr: 0x%p\n",
+ pr_debug("free_phys: 0x%016llx, free_q addr: 0x%p\n",
(unsigned long long) dev_info.free_phys, kni->free_q);
- KNI_PRINT("req_phys: 0x%016llx, req_q addr: 0x%p\n",
+ pr_debug("req_phys: 0x%016llx, req_q addr: 0x%p\n",
(unsigned long long) dev_info.req_phys, kni->req_q);
- KNI_PRINT("resp_phys: 0x%016llx, resp_q addr: 0x%p\n",
+ pr_debug("resp_phys: 0x%016llx, resp_q addr: 0x%p\n",
(unsigned long long) dev_info.resp_phys, kni->resp_q);
- KNI_PRINT("mbuf_phys: 0x%016llx, mbuf_kva: 0x%p\n",
- (unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva);
- KNI_PRINT("mbuf_va: 0x%p\n", dev_info.mbuf_va);
- KNI_PRINT("mbuf_size: %u\n", kni->mbuf_size);
+ pr_debug("mbuf_size: %u\n", kni->mbuf_size);
- KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x\n",
+ pr_debug("PCI: %02x:%02x.%02x %04x:%04x\n",
dev_info.bus,
dev_info.devid,
dev_info.function,
@@ -501,7 +423,7 @@ kni_ioctl_create(struct net *net,
/* Support Ethtool */
while (pci) {
- KNI_PRINT("pci_bus: %02x:%02x:%02x \n",
+ pr_debug("pci_bus: %02x:%02x:%02x\n",
pci->bus->number,
PCI_SLOT(pci->devfn),
PCI_FUNC(pci->devfn));
@@ -510,28 +432,21 @@ kni_ioctl_create(struct net *net,
(PCI_SLOT(pci->devfn) == dev_info.devid) &&
(PCI_FUNC(pci->devfn) == dev_info.function)) {
found_pci = pci;
- switch (dev_info.device_id) {
- #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
- #include <rte_pci_dev_ids.h>
- ret = igb_kni_probe(found_pci, &lad_dev);
- break;
- #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) \
- case (dev):
- #include <rte_pci_dev_ids.h>
+
+ if (pci_match_id(ixgbe_pci_tbl, found_pci))
ret = ixgbe_kni_probe(found_pci, &lad_dev);
- break;
- default:
+ else if (pci_match_id(igb_pci_tbl, found_pci))
+ ret = igb_kni_probe(found_pci, &lad_dev);
+ else
ret = -1;
- break;
- }
- KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p\n",
+ pr_debug("PCI found: pci=0x%p, lad_dev=0x%p\n",
pci, lad_dev);
if (ret == 0) {
kni->lad_dev = lad_dev;
kni_set_ethtool_ops(kni->net_dev);
} else {
- KNI_ERR("Device not supported by ethtool");
+ pr_err("Device not supported by ethtool");
kni->lad_dev = NULL;
}
@@ -546,7 +461,7 @@ kni_ioctl_create(struct net *net,
pci_dev_put(pci);
if (kni->lad_dev)
- memcpy(net_dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
+ ether_addr_copy(net_dev->dev_addr, kni->lad_dev->dev_addr);
else
/*
* Generate random mac address. eth_random_addr() is the newer
@@ -556,9 +471,11 @@ kni_ioctl_create(struct net *net,
ret = register_netdev(net_dev);
if (ret) {
- KNI_ERR("error %i registering device \"%s\"\n",
+ pr_err("error %i registering device \"%s\"\n",
ret, dev_info.name);
+ kni->net_dev = NULL;
kni_dev_remove(kni);
+ free_netdev(net_dev);
return -ENODEV;
}
@@ -566,22 +483,9 @@ kni_ioctl_create(struct net *net,
kni_vhost_init(kni);
#endif
- /**
- * Create a new kernel thread for multiple mode, set its core affinity,
- * and finally wake it up.
- */
- if (multiple_kthread_on) {
- kni->pthread = kthread_create(kni_thread_multiple,
- (void *)kni,
- "kni_%s", kni->name);
- if (IS_ERR(kni->pthread)) {
- kni_dev_remove(kni);
- return -ECANCELED;
- }
- if (dev_info.force_bind)
- kthread_bind(kni->pthread, kni->core_id);
- wake_up_process(kni->pthread);
- }
+ ret = kni_run_thread(knet, kni, dev_info.force_bind);
+ if (ret != 0)
+ return ret;
down_write(&knet->kni_list_lock);
list_add(&kni->list, &knet->kni_list_head);
@@ -591,8 +495,8 @@ kni_ioctl_create(struct net *net,
}
static int
-kni_ioctl_release(struct net *net,
- unsigned int ioctl_num, unsigned long ioctl_param)
+kni_ioctl_release(struct net *net, uint32_t ioctl_num,
+ unsigned long ioctl_param)
{
struct kni_net *knet = net_generic(net, kni_net_id);
int ret = -EINVAL;
@@ -600,11 +504,11 @@ kni_ioctl_release(struct net *net,
struct rte_kni_device_info dev_info;
if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
- return -EINVAL;
+ return -EINVAL;
ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
if (ret) {
- KNI_ERR("copy_from_user in kni_ioctl_release");
+ pr_err("copy_from_user in kni_ioctl_release");
return -EIO;
}
@@ -631,21 +535,19 @@ kni_ioctl_release(struct net *net,
break;
}
up_write(&knet->kni_list_lock);
- printk(KERN_INFO "KNI: %s release kni named %s\n",
+ pr_info("%s release kni named %s\n",
(ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name);
return ret;
}
static int
-kni_ioctl(struct inode *inode,
- unsigned int ioctl_num,
- unsigned long ioctl_param)
+kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param)
{
int ret = -EINVAL;
struct net *net = current->nsproxy->net_ns;
- KNI_DBG("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
+ pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
/*
* Switch according to the ioctl called
@@ -661,7 +563,7 @@ kni_ioctl(struct inode *inode,
ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
break;
default:
- KNI_DBG("IOCTL default\n");
+ pr_debug("IOCTL default\n");
break;
}
@@ -669,16 +571,99 @@ kni_ioctl(struct inode *inode,
}
static int
-kni_compat_ioctl(struct inode *inode,
- unsigned int ioctl_num,
+kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num,
unsigned long ioctl_param)
{
/* 32 bits app on 64 bits OS to be supported later */
- KNI_PRINT("Not implemented.\n");
+ pr_debug("Not implemented.\n");
return -EINVAL;
}
+static const struct file_operations kni_fops = {
+ .owner = THIS_MODULE,
+ .open = kni_open,
+ .release = kni_release,
+ .unlocked_ioctl = (void *)kni_ioctl,
+ .compat_ioctl = (void *)kni_compat_ioctl,
+};
+
+static struct miscdevice kni_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = KNI_DEVICE,
+ .fops = &kni_fops,
+};
+
+static int __init
+kni_parse_kthread_mode(void)
+{
+ if (!kthread_mode)
+ return 0;
+
+ if (strcmp(kthread_mode, "single") == 0)
+ return 0;
+ else if (strcmp(kthread_mode, "multiple") == 0)
+ multiple_kthread_on = 1;
+ else
+ return -1;
+
+ return 0;
+}
+
+static int __init
+kni_init(void)
+{
+ int rc;
+
+ if (kni_parse_kthread_mode() < 0) {
+ pr_err("Invalid parameter for kthread_mode\n");
+ return -EINVAL;
+ }
+
+ if (multiple_kthread_on == 0)
+ pr_debug("Single kernel thread for all KNI devices\n");
+ else
+ pr_debug("Multiple kernel thread mode enabled\n");
+
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ rc = register_pernet_subsys(&kni_net_ops);
+#else
+ rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+#endif
+ if (rc)
+ return -EPERM;
+
+ rc = misc_register(&kni_misc);
+ if (rc != 0) {
+ pr_err("Misc registration failed\n");
+ goto out;
+ }
+
+ /* Configure the lo mode according to the input parameter */
+ kni_net_config_lo_mode(lo_mode);
+
+ return 0;
+
+out:
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ unregister_pernet_subsys(&kni_net_ops);
+#else
+ unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
+#endif
+ return rc;
+}
+
+static void __exit
+kni_exit(void)
+{
+ misc_deregister(&kni_misc);
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+ unregister_pernet_subsys(&kni_net_ops);
+#else
+ unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
+#endif
+}
+
module_init(kni_init);
module_exit(kni_exit);
diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c
index fc82193a..4ac99cfe 100644
--- a/lib/librte_eal/linuxapp/kni/kni_net.c
+++ b/lib/librte_eal/linuxapp/kni/kni_net.c
@@ -44,23 +44,103 @@
#define WD_TIMEOUT 5 /*jiffies */
-#define MBUF_BURST_SZ 32
-
#define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */
/* typedef for rx function */
typedef void (*kni_net_rx_t)(struct kni_dev *kni);
-static int kni_net_tx(struct sk_buff *skb, struct net_device *dev);
static void kni_net_rx_normal(struct kni_dev *kni);
-static void kni_net_rx_lo_fifo(struct kni_dev *kni);
-static void kni_net_rx_lo_fifo_skb(struct kni_dev *kni);
-static int kni_net_process_request(struct kni_dev *kni,
- struct rte_kni_request *req);
/* kni rx function pointer, with default to normal rx */
static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
+/* physical address to kernel virtual address */
+static void *
+pa2kva(void *pa)
+{
+ return phys_to_virt((unsigned long)pa);
+}
+
+/* physical address to virtual address */
+static void *
+pa2va(void *pa, struct rte_kni_mbuf *m)
+{
+ void *va;
+
+ va = (void *)((unsigned long)pa +
+ (unsigned long)m->buf_addr -
+ (unsigned long)m->buf_physaddr);
+ return va;
+}
+
+/* mbuf data kernel virtual address from mbuf kernel virtual address */
+static void *
+kva2data_kva(struct rte_kni_mbuf *m)
+{
+ return phys_to_virt(m->buf_physaddr + m->data_off);
+}
+
+/* virtual address to physical address */
+static void *
+va2pa(void *va, struct rte_kni_mbuf *m)
+{
+ void *pa;
+
+ pa = (void *)((unsigned long)va -
+ ((unsigned long)m->buf_addr -
+ (unsigned long)m->buf_physaddr));
+ return pa;
+}
+
+/*
+ * It can be called to process the request.
+ */
+static int
+kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
+{
+ int ret = -1;
+ void *resp_va;
+ uint32_t num;
+ int ret_val;
+
+ if (!kni || !req) {
+ pr_err("No kni instance or request\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&kni->sync_lock);
+
+ /* Construct data */
+ memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request));
+ num = kni_fifo_put(kni->req_q, &kni->sync_va, 1);
+ if (num < 1) {
+ pr_err("Cannot send to req_q\n");
+ ret = -EBUSY;
+ goto fail;
+ }
+
+ ret_val = wait_event_interruptible_timeout(kni->wq,
+ kni_fifo_count(kni->resp_q), 3 * HZ);
+ if (signal_pending(current) || ret_val <= 0) {
+ ret = -ETIME;
+ goto fail;
+ }
+ num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1);
+ if (num != 1 || resp_va != kni->sync_va) {
+ /* This should never happen */
+ pr_err("No data in resp_q\n");
+ ret = -ENODATA;
+ goto fail;
+ }
+
+ memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
+ ret = 0;
+
+fail:
+ mutex_unlock(&kni->sync_lock);
+ return ret;
+}
+
/*
* Open and close
*/
@@ -116,18 +196,112 @@ kni_net_config(struct net_device *dev, struct ifmap *map)
}
/*
+ * Transmit a packet (called by the kernel)
+ */
+#ifdef RTE_KNI_VHOST
+static int
+kni_net_tx(struct sk_buff *skb, struct net_device *dev)
+{
+ struct kni_dev *kni = netdev_priv(dev);
+
+ dev_kfree_skb(skb);
+ kni->stats.tx_dropped++;
+
+ return NETDEV_TX_OK;
+}
+#else
+static int
+kni_net_tx(struct sk_buff *skb, struct net_device *dev)
+{
+ int len = 0;
+ uint32_t ret;
+ struct kni_dev *kni = netdev_priv(dev);
+ struct rte_kni_mbuf *pkt_kva = NULL;
+ void *pkt_pa = NULL;
+ void *pkt_va = NULL;
+
+ /* save the timestamp */
+#ifdef HAVE_TRANS_START_HELPER
+ netif_trans_update(dev);
+#else
+ dev->trans_start = jiffies;
+#endif
+
+ /* Check if the length of skb is less than mbuf size */
+ if (skb->len > kni->mbuf_size)
+ goto drop;
+
+ /**
+ * Check if it has at least one free entry in tx_q and
+ * one entry in alloc_q.
+ */
+ if (kni_fifo_free_count(kni->tx_q) == 0 ||
+ kni_fifo_count(kni->alloc_q) == 0) {
+ /**
+ * If no free entry in tx_q or no entry in alloc_q,
+ * drops skb and goes out.
+ */
+ goto drop;
+ }
+
+ /* dequeue a mbuf from alloc_q */
+ ret = kni_fifo_get(kni->alloc_q, &pkt_pa, 1);
+ if (likely(ret == 1)) {
+ void *data_kva;
+
+ pkt_kva = pa2kva(pkt_pa);
+ data_kva = kva2data_kva(pkt_kva);
+ pkt_va = pa2va(pkt_pa, pkt_kva);
+
+ len = skb->len;
+ memcpy(data_kva, skb->data, len);
+ if (unlikely(len < ETH_ZLEN)) {
+ memset(data_kva + len, 0, ETH_ZLEN - len);
+ len = ETH_ZLEN;
+ }
+ pkt_kva->pkt_len = len;
+ pkt_kva->data_len = len;
+
+ /* enqueue mbuf into tx_q */
+ ret = kni_fifo_put(kni->tx_q, &pkt_va, 1);
+ if (unlikely(ret != 1)) {
+ /* Failing should not happen */
+ pr_err("Fail to enqueue mbuf into tx_q\n");
+ goto drop;
+ }
+ } else {
+ /* Failing should not happen */
+ pr_err("Fail to dequeue mbuf from alloc_q\n");
+ goto drop;
+ }
+
+ /* Free skb and update statistics */
+ dev_kfree_skb(skb);
+ kni->stats.tx_bytes += len;
+ kni->stats.tx_packets++;
+
+ return NETDEV_TX_OK;
+
+drop:
+ /* Free skb and update statistics */
+ dev_kfree_skb(skb);
+ kni->stats.tx_dropped++;
+
+ return NETDEV_TX_OK;
+}
+#endif
+
+/*
* RX: normal working mode
*/
static void
kni_net_rx_normal(struct kni_dev *kni)
{
- unsigned ret;
+ uint32_t ret;
uint32_t len;
- unsigned i, num_rx, num_fq;
+ uint32_t i, num_rx, num_fq;
struct rte_kni_mbuf *kva;
- struct rte_kni_mbuf *va[MBUF_BURST_SZ];
- void * data_kva;
-
+ void *data_kva;
struct sk_buff *skb;
struct net_device *dev = kni->net_dev;
@@ -139,24 +313,22 @@ kni_net_rx_normal(struct kni_dev *kni)
}
/* Calculate the number of entries to dequeue from rx_q */
- num_rx = min(num_fq, (unsigned)MBUF_BURST_SZ);
+ num_rx = min_t(uint32_t, num_fq, MBUF_BURST_SZ);
/* Burst dequeue from rx_q */
- num_rx = kni_fifo_get(kni->rx_q, (void **)va, num_rx);
+ num_rx = kni_fifo_get(kni->rx_q, kni->pa, num_rx);
if (num_rx == 0)
return;
/* Transfer received packets to netif */
for (i = 0; i < num_rx; i++) {
- kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
+ kva = pa2kva(kni->pa[i]);
len = kva->pkt_len;
-
- data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va
- + kni->mbuf_kva;
+ data_kva = kva2data_kva(kva);
+ kni->va[i] = pa2va(kni->pa[i], kva);
skb = dev_alloc_skb(len + 2);
if (!skb) {
- KNI_ERR("Out of mem, dropping pkts\n");
/* Update statistics */
kni->stats.rx_dropped++;
continue;
@@ -178,9 +350,8 @@ kni_net_rx_normal(struct kni_dev *kni)
if (!kva->next)
break;
- kva = kva->next - kni->mbuf_va + kni->mbuf_kva;
- data_kva = kva->buf_addr + kva->data_off
- - kni->mbuf_va + kni->mbuf_kva;
+ kva = pa2kva(va2pa(kva->next, kva));
+ data_kva = kva2data_kva(kva);
}
}
@@ -197,10 +368,10 @@ kni_net_rx_normal(struct kni_dev *kni)
}
/* Burst enqueue mbufs into free_q */
- ret = kni_fifo_put(kni->free_q, (void **)va, num_rx);
+ ret = kni_fifo_put(kni->free_q, kni->va, num_rx);
if (ret != num_rx)
/* Failing should not happen */
- KNI_ERR("Fail to enqueue entries into free_q\n");
+ pr_err("Fail to enqueue entries into free_q\n");
}
/*
@@ -209,15 +380,12 @@ kni_net_rx_normal(struct kni_dev *kni)
static void
kni_net_rx_lo_fifo(struct kni_dev *kni)
{
- unsigned ret;
+ uint32_t ret;
uint32_t len;
- unsigned i, num, num_rq, num_tq, num_aq, num_fq;
+ uint32_t i, num, num_rq, num_tq, num_aq, num_fq;
struct rte_kni_mbuf *kva;
- struct rte_kni_mbuf *va[MBUF_BURST_SZ];
- void * data_kva;
-
+ void *data_kva;
struct rte_kni_mbuf *alloc_kva;
- struct rte_kni_mbuf *alloc_va[MBUF_BURST_SZ];
void *alloc_data_kva;
/* Get the number of entries in rx_q */
@@ -236,33 +404,32 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
num = min(num_rq, num_tq);
num = min(num, num_aq);
num = min(num, num_fq);
- num = min(num, (unsigned)MBUF_BURST_SZ);
+ num = min_t(uint32_t, num, MBUF_BURST_SZ);
/* Return if no entry to dequeue from rx_q */
if (num == 0)
return;
/* Burst dequeue from rx_q */
- ret = kni_fifo_get(kni->rx_q, (void **)va, num);
+ ret = kni_fifo_get(kni->rx_q, kni->pa, num);
if (ret == 0)
return; /* Failing should not happen */
/* Dequeue entries from alloc_q */
- ret = kni_fifo_get(kni->alloc_q, (void **)alloc_va, num);
+ ret = kni_fifo_get(kni->alloc_q, kni->alloc_pa, num);
if (ret) {
num = ret;
/* Copy mbufs */
for (i = 0; i < num; i++) {
- kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
+ kva = pa2kva(kni->pa[i]);
len = kva->pkt_len;
- data_kva = kva->buf_addr + kva->data_off -
- kni->mbuf_va + kni->mbuf_kva;
-
- alloc_kva = (void *)alloc_va[i] - kni->mbuf_va +
- kni->mbuf_kva;
- alloc_data_kva = alloc_kva->buf_addr +
- alloc_kva->data_off - kni->mbuf_va +
- kni->mbuf_kva;
+ data_kva = kva2data_kva(kva);
+ kni->va[i] = pa2va(kni->pa[i], kva);
+
+ alloc_kva = pa2kva(kni->alloc_pa[i]);
+ alloc_data_kva = kva2data_kva(alloc_kva);
+ kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
+
memcpy(alloc_data_kva, data_kva, len);
alloc_kva->pkt_len = len;
alloc_kva->data_len = len;
@@ -272,17 +439,17 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
}
/* Burst enqueue mbufs into tx_q */
- ret = kni_fifo_put(kni->tx_q, (void **)alloc_va, num);
+ ret = kni_fifo_put(kni->tx_q, kni->alloc_va, num);
if (ret != num)
/* Failing should not happen */
- KNI_ERR("Fail to enqueue mbufs into tx_q\n");
+ pr_err("Fail to enqueue mbufs into tx_q\n");
}
/* Burst enqueue mbufs into free_q */
- ret = kni_fifo_put(kni->free_q, (void **)va, num);
+ ret = kni_fifo_put(kni->free_q, kni->va, num);
if (ret != num)
/* Failing should not happen */
- KNI_ERR("Fail to enqueue mbufs into free_q\n");
+ pr_err("Fail to enqueue mbufs into free_q\n");
/**
* Update statistic, and enqueue/dequeue failure is impossible,
@@ -298,13 +465,11 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
static void
kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
{
- unsigned ret;
+ uint32_t ret;
uint32_t len;
- unsigned i, num_rq, num_fq, num;
+ uint32_t i, num_rq, num_fq, num;
struct rte_kni_mbuf *kva;
- struct rte_kni_mbuf *va[MBUF_BURST_SZ];
- void * data_kva;
-
+ void *data_kva;
struct sk_buff *skb;
struct net_device *dev = kni->net_dev;
@@ -316,28 +481,26 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
/* Calculate the number of entries to dequeue from rx_q */
num = min(num_rq, num_fq);
- num = min(num, (unsigned)MBUF_BURST_SZ);
+ num = min_t(uint32_t, num, MBUF_BURST_SZ);
/* Return if no entry to dequeue from rx_q */
if (num == 0)
return;
/* Burst dequeue mbufs from rx_q */
- ret = kni_fifo_get(kni->rx_q, (void **)va, num);
+ ret = kni_fifo_get(kni->rx_q, kni->pa, num);
if (ret == 0)
return;
/* Copy mbufs to sk buffer and then call tx interface */
for (i = 0; i < num; i++) {
- kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
+ kva = pa2kva(kni->pa[i]);
len = kva->pkt_len;
- data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va +
- kni->mbuf_kva;
+ data_kva = kva2data_kva(kva);
+ kni->va[i] = pa2va(kni->pa[i], kva);
skb = dev_alloc_skb(len + 2);
- if (skb == NULL)
- KNI_ERR("Out of mem, dropping pkts\n");
- else {
+ if (skb) {
/* Align IP on 16B boundary */
skb_reserve(skb, 2);
memcpy(skb_put(skb, len), data_kva, len);
@@ -349,7 +512,6 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
/* Simulate real usage, allocate/copy skb twice */
skb = dev_alloc_skb(len + 2);
if (skb == NULL) {
- KNI_ERR("Out of mem, dropping pkts\n");
kni->stats.rx_dropped++;
continue;
}
@@ -370,9 +532,8 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
if (!kva->next)
break;
- kva = kva->next - kni->mbuf_va + kni->mbuf_kva;
- data_kva = kva->buf_addr + kva->data_off
- - kni->mbuf_va + kni->mbuf_kva;
+ kva = pa2kva(va2pa(kva->next, kva));
+ data_kva = kva2data_kva(kva);
}
}
@@ -387,10 +548,10 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
}
/* enqueue all the mbufs from rx_q into free_q */
- ret = kni_fifo_put(kni->free_q, (void **)&va, num);
+ ret = kni_fifo_put(kni->free_q, kni->va, num);
if (ret != num)
/* Failing should not happen */
- KNI_ERR("Fail to enqueue mbufs into free_q\n");
+ pr_err("Fail to enqueue mbufs into free_q\n");
}
/* rx interface */
@@ -405,114 +566,18 @@ kni_net_rx(struct kni_dev *kni)
}
/*
- * Transmit a packet (called by the kernel)
- */
-#ifdef RTE_KNI_VHOST
-static int
-kni_net_tx(struct sk_buff *skb, struct net_device *dev)
-{
- struct kni_dev *kni = netdev_priv(dev);
-
- dev_kfree_skb(skb);
- kni->stats.tx_dropped++;
-
- return NETDEV_TX_OK;
-}
-#else
-static int
-kni_net_tx(struct sk_buff *skb, struct net_device *dev)
-{
- int len = 0;
- unsigned ret;
- struct kni_dev *kni = netdev_priv(dev);
- struct rte_kni_mbuf *pkt_kva = NULL;
- struct rte_kni_mbuf *pkt_va = NULL;
-
- /* save the timestamp */
-#ifdef HAVE_TRANS_START_HELPER
- netif_trans_update(dev);
-#else
- dev->trans_start = jiffies;
-#endif
-
- /* Check if the length of skb is less than mbuf size */
- if (skb->len > kni->mbuf_size)
- goto drop;
-
- /**
- * Check if it has at least one free entry in tx_q and
- * one entry in alloc_q.
- */
- if (kni_fifo_free_count(kni->tx_q) == 0 ||
- kni_fifo_count(kni->alloc_q) == 0) {
- /**
- * If no free entry in tx_q or no entry in alloc_q,
- * drops skb and goes out.
- */
- goto drop;
- }
-
- /* dequeue a mbuf from alloc_q */
- ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1);
- if (likely(ret == 1)) {
- void *data_kva;
-
- pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva;
- data_kva = pkt_kva->buf_addr + pkt_kva->data_off - kni->mbuf_va
- + kni->mbuf_kva;
-
- len = skb->len;
- memcpy(data_kva, skb->data, len);
- if (unlikely(len < ETH_ZLEN)) {
- memset(data_kva + len, 0, ETH_ZLEN - len);
- len = ETH_ZLEN;
- }
- pkt_kva->pkt_len = len;
- pkt_kva->data_len = len;
-
- /* enqueue mbuf into tx_q */
- ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1);
- if (unlikely(ret != 1)) {
- /* Failing should not happen */
- KNI_ERR("Fail to enqueue mbuf into tx_q\n");
- goto drop;
- }
- } else {
- /* Failing should not happen */
- KNI_ERR("Fail to dequeue mbuf from alloc_q\n");
- goto drop;
- }
-
- /* Free skb and update statistics */
- dev_kfree_skb(skb);
- kni->stats.tx_bytes += len;
- kni->stats.tx_packets++;
-
- return NETDEV_TX_OK;
-
-drop:
- /* Free skb and update statistics */
- dev_kfree_skb(skb);
- kni->stats.tx_dropped++;
-
- return NETDEV_TX_OK;
-}
-#endif
-
-/*
* Deal with a transmit timeout.
*/
static void
-kni_net_tx_timeout (struct net_device *dev)
+kni_net_tx_timeout(struct net_device *dev)
{
struct kni_dev *kni = netdev_priv(dev);
- KNI_DBG("Transmit timeout at %ld, latency %ld\n", jiffies,
- jiffies - dev->trans_start);
+ pr_debug("Transmit timeout at %ld, latency %ld\n", jiffies,
+ jiffies - dev_trans_start(dev));
kni->stats.tx_errors++;
netif_wake_queue(dev);
- return;
}
/*
@@ -521,8 +586,8 @@ kni_net_tx_timeout (struct net_device *dev)
static int
kni_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
- KNI_DBG("kni_net_ioctl %d\n",
- ((struct kni_dev *)netdev_priv(dev))->group_id);
+ pr_debug("kni_net_ioctl group:%d cmd:%d\n",
+ ((struct kni_dev *)netdev_priv(dev))->group_id, cmd);
return 0;
}
@@ -539,7 +604,7 @@ kni_net_change_mtu(struct net_device *dev, int new_mtu)
struct rte_kni_request req;
struct kni_dev *kni = netdev_priv(dev);
- KNI_DBG("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
+ pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
memset(&req, 0, sizeof(req));
req.req_id = RTE_KNI_REQ_CHANGE_MTU;
@@ -562,61 +627,13 @@ kni_net_poll_resp(struct kni_dev *kni)
}
/*
- * It can be called to process the request.
- */
-static int
-kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
-{
- int ret = -1;
- void *resp_va;
- unsigned num;
- int ret_val;
-
- if (!kni || !req) {
- KNI_ERR("No kni instance or request\n");
- return -EINVAL;
- }
-
- mutex_lock(&kni->sync_lock);
-
- /* Construct data */
- memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request));
- num = kni_fifo_put(kni->req_q, &kni->sync_va, 1);
- if (num < 1) {
- KNI_ERR("Cannot send to req_q\n");
- ret = -EBUSY;
- goto fail;
- }
-
- ret_val = wait_event_interruptible_timeout(kni->wq,
- kni_fifo_count(kni->resp_q), 3 * HZ);
- if (signal_pending(current) || ret_val <= 0) {
- ret = -ETIME;
- goto fail;
- }
- num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1);
- if (num != 1 || resp_va != kni->sync_va) {
- /* This should never happen */
- KNI_ERR("No data in resp_q\n");
- ret = -ENODATA;
- goto fail;
- }
-
- memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
- ret = 0;
-
-fail:
- mutex_unlock(&kni->sync_lock);
- return ret;
-}
-
-/*
* Return statistics to the caller
*/
static struct net_device_stats *
kni_net_stats(struct net_device *dev)
{
struct kni_dev *kni = netdev_priv(dev);
+
return &kni->stats;
}
@@ -626,7 +643,7 @@ kni_net_stats(struct net_device *dev)
static int
kni_net_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type, const void *daddr,
- const void *saddr, unsigned int len)
+ const void *saddr, uint32_t len)
{
struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
@@ -637,7 +654,6 @@ kni_net_header(struct sk_buff *skb, struct net_device *dev,
return dev->hard_header_len;
}
-
/*
* Re-fill the eth header
*/
@@ -662,9 +678,11 @@ kni_net_rebuild_header(struct sk_buff *skb)
*
* Returns 0 on success, negative on failure
**/
-static int kni_net_set_mac(struct net_device *netdev, void *p)
+static int
+kni_net_set_mac(struct net_device *netdev, void *p)
{
struct sockaddr *addr = p;
+
if (!is_valid_ether_addr((unsigned char *)(addr->sa_data)))
return -EADDRNOTAVAIL;
memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
@@ -672,7 +690,8 @@ static int kni_net_set_mac(struct net_device *netdev, void *p)
}
#ifdef HAVE_CHANGE_CARRIER_CB
-static int kni_net_change_carrier(struct net_device *dev, bool new_carrier)
+static int
+kni_net_change_carrier(struct net_device *dev, bool new_carrier)
{
if (new_carrier)
netif_carrier_on(dev);
@@ -711,8 +730,6 @@ kni_net_init(struct net_device *dev)
{
struct kni_dev *kni = netdev_priv(dev);
- KNI_DBG("kni_net_init\n");
-
init_waitqueue_head(&kni->wq);
mutex_init(&kni->sync_lock);
@@ -726,18 +743,18 @@ void
kni_net_config_lo_mode(char *lo_str)
{
if (!lo_str) {
- KNI_PRINT("loopback disabled");
+ pr_debug("loopback disabled");
return;
}
if (!strcmp(lo_str, "lo_mode_none"))
- KNI_PRINT("loopback disabled");
+ pr_debug("loopback disabled");
else if (!strcmp(lo_str, "lo_mode_fifo")) {
- KNI_PRINT("loopback mode=lo_mode_fifo enabled");
+ pr_debug("loopback mode=lo_mode_fifo enabled");
kni_net_rx_func = kni_net_rx_lo_fifo;
} else if (!strcmp(lo_str, "lo_mode_fifo_skb")) {
- KNI_PRINT("loopback mode=lo_mode_fifo_skb enabled");
+ pr_debug("loopback mode=lo_mode_fifo_skb enabled");
kni_net_rx_func = kni_net_rx_lo_fifo_skb;
} else
- KNI_PRINT("Incognizant parameter, loopback disabled");
+ pr_debug("Incognizant parameter, loopback disabled");
}
diff --git a/lib/librte_eal/linuxapp/kni/kni_vhost.c b/lib/librte_eal/linuxapp/kni/kni_vhost.c
index a3ca8499..f54c34b1 100644
--- a/lib/librte_eal/linuxapp/kni/kni_vhost.c
+++ b/lib/librte_eal/linuxapp/kni/kni_vhost.c
@@ -32,6 +32,7 @@
#include <linux/sched.h>
#include <linux/if_tun.h>
#include <linux/version.h>
+#include <linux/file.h>
#include "compat.h"
#include "kni_dev.h"
@@ -39,21 +40,12 @@
#define RX_BURST_SZ 4
-extern void put_unused_fd(unsigned int fd);
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0)
-extern struct file*
-sock_alloc_file(struct socket *sock,
- int flags, const char *dname);
-
-extern int get_unused_fd_flags(unsigned flags);
-
-extern void fd_install(unsigned int fd, struct file *file);
-
+#ifdef HAVE_STATIC_SOCK_MAP_FD
static int kni_sock_map_fd(struct socket *sock)
{
struct file *file;
int fd = get_unused_fd_flags(0);
+
if (fd < 0)
return fd;
@@ -65,8 +57,6 @@ static int kni_sock_map_fd(struct socket *sock)
fd_install(fd, file);
return fd;
}
-#else
-#define kni_sock_map_fd(s) sock_map_fd(s, 0)
#endif
static struct proto kni_raw_proto = {
@@ -77,13 +67,13 @@ static struct proto kni_raw_proto = {
static inline int
kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m,
- unsigned offset, unsigned len)
+ uint32_t offset, uint32_t len)
{
struct rte_kni_mbuf *pkt_kva = NULL;
struct rte_kni_mbuf *pkt_va = NULL;
int ret;
- KNI_DBG_TX("tx offset=%d, len=%d, iovlen=%d\n",
+ pr_debug("tx offset=%d, len=%d, iovlen=%d\n",
#ifdef HAVE_IOV_ITER_MSGHDR
offset, len, (int)m->msg_iter.iov->iov_len);
#else
@@ -110,7 +100,7 @@ kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m,
pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva;
data_kva = pkt_kva->buf_addr + pkt_kva->data_off
- - kni->mbuf_va + kni->mbuf_kva;
+ - kni->mbuf_va + kni->mbuf_kva;
#ifdef HAVE_IOV_ITER_MSGHDR
copy_from_iter(data_kva, len, &m->msg_iter);
@@ -129,12 +119,12 @@ kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m,
ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1);
if (unlikely(ret != 1)) {
/* Failing should not happen */
- KNI_ERR("Fail to enqueue mbuf into tx_q\n");
+ pr_err("Fail to enqueue mbuf into tx_q\n");
goto drop;
}
} else {
/* Failing should not happen */
- KNI_ERR("Fail to dequeue mbuf from alloc_q\n");
+ pr_err("Fail to dequeue mbuf from alloc_q\n");
goto drop;
}
@@ -153,12 +143,12 @@ drop:
static inline int
kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m,
- unsigned offset, unsigned len)
+ uint32_t offset, uint32_t len)
{
uint32_t pkt_len;
struct rte_kni_mbuf *kva;
struct rte_kni_mbuf *va;
- void * data_kva;
+ void *data_kva;
struct sk_buff *skb;
struct kni_vhost_queue *q = kni->vhost_queue;
@@ -173,19 +163,19 @@ kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m,
if (unlikely(skb == NULL))
return 0;
- kva = (struct rte_kni_mbuf*)skb->data;
+ kva = (struct rte_kni_mbuf *)skb->data;
/* free skb to cache */
skb->data = NULL;
- if (unlikely(1 != kni_fifo_put(q->fifo, (void **)&skb, 1)))
+ if (unlikely(kni_fifo_put(q->fifo, (void **)&skb, 1) != 1))
/* Failing should not happen */
- KNI_ERR("Fail to enqueue entries into rx cache fifo\n");
+ pr_err("Fail to enqueue entries into rx cache fifo\n");
pkt_len = kva->data_len;
if (unlikely(pkt_len > len))
goto drop;
- KNI_DBG_RX("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n",
+ pr_debug("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n",
#ifdef HAVE_IOV_ITER_MSGHDR
offset, len, pkt_len, (int)m->msg_iter.iov->iov_len);
#else
@@ -205,12 +195,12 @@ kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m,
kni->stats.rx_packets++;
/* enqueue mbufs into free_q */
- va = (void*)kva - kni->mbuf_kva + kni->mbuf_va;
- if (unlikely(1 != kni_fifo_put(kni->free_q, (void **)&va, 1)))
+ va = (void *)kva - kni->mbuf_kva + kni->mbuf_va;
+ if (unlikely(kni_fifo_put(kni->free_q, (void **)&va, 1) != 1))
/* Failing should not happen */
- KNI_ERR("Fail to enqueue entries into free_q\n");
+ pr_err("Fail to enqueue entries into free_q\n");
- KNI_DBG_RX("receive done %d\n", pkt_len);
+ pr_debug("receive done %d\n", pkt_len);
return pkt_len;
@@ -221,29 +211,25 @@ drop:
return 0;
}
-static unsigned int
-kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait)
+static uint32_t
+kni_sock_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct kni_vhost_queue *q =
container_of(sock->sk, struct kni_vhost_queue, sk);
struct kni_dev *kni;
- unsigned int mask = 0;
+ uint32_t mask = 0;
if (unlikely(q == NULL || q->kni == NULL))
return POLLERR;
kni = q->kni;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
- KNI_DBG("start kni_poll on group %d, wq 0x%16llx\n",
+#ifdef HAVE_SOCKET_WQ
+ pr_debug("start kni_poll on group %d, wq 0x%16llx\n",
kni->group_id, (uint64_t)sock->wq);
-#else
- KNI_DBG("start kni_poll on group %d, wait at 0x%16llx\n",
- kni->group_id, (uint64_t)&sock->wait);
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
poll_wait(file, &sock->wq->wait, wait);
#else
+ pr_debug("start kni_poll on group %d, wait at 0x%16llx\n",
+ kni->group_id, (uint64_t)&sock->wait);
poll_wait(file, &sock->wait, wait);
#endif
@@ -252,11 +238,12 @@ kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait)
if (sock_writeable(&q->sk) ||
#ifdef SOCKWQ_ASYNC_NOSPACE
- (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) &&
+ (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) &&
+ sock_writeable(&q->sk)))
#else
- (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) &&
+ (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) &&
+ sock_writeable(&q->sk)))
#endif
- sock_writeable(&q->sk)))
mask |= POLLOUT | POLLWRNORM;
return mask;
@@ -269,7 +256,7 @@ kni_vhost_enqueue(struct kni_dev *kni, struct kni_vhost_queue *q,
struct rte_kni_mbuf *kva;
kva = (void *)(va) - kni->mbuf_va + kni->mbuf_kva;
- (skb)->data = (unsigned char*)kva;
+ (skb)->data = (unsigned char *)kva;
(skb)->len = kva->data_len;
skb_queue_tail(&q->sk.sk_receive_queue, skb);
}
@@ -279,6 +266,7 @@ kni_vhost_enqueue_burst(struct kni_dev *kni, struct kni_vhost_queue *q,
struct sk_buff **skb, struct rte_kni_mbuf **va)
{
int i;
+
for (i = 0; i < RX_BURST_SZ; skb++, va++, i++)
kni_vhost_enqueue(kni, q, *skb, *va);
}
@@ -287,9 +275,9 @@ int
kni_chk_vhost_rx(struct kni_dev *kni)
{
struct kni_vhost_queue *q = kni->vhost_queue;
- unsigned nb_in, nb_mbuf, nb_skb;
- const unsigned BURST_MASK = RX_BURST_SZ - 1;
- unsigned nb_burst, nb_backlog, i;
+ uint32_t nb_in, nb_mbuf, nb_skb;
+ const uint32_t BURST_MASK = RX_BURST_SZ - 1;
+ uint32_t nb_burst, nb_backlog, i;
struct sk_buff *skb[RX_BURST_SZ];
struct rte_kni_mbuf *va[RX_BURST_SZ];
@@ -305,20 +293,18 @@ kni_chk_vhost_rx(struct kni_dev *kni)
nb_mbuf = kni_fifo_count(kni->rx_q);
nb_in = min(nb_mbuf, nb_skb);
- nb_in = min(nb_in, (unsigned)RX_BURST_SZ);
+ nb_in = min_t(uint32_t, nb_in, RX_BURST_SZ);
nb_burst = (nb_in & ~BURST_MASK);
nb_backlog = (nb_in & BURST_MASK);
/* enqueue skb_queue per BURST_SIZE bulk */
- if (0 != nb_burst) {
- if (unlikely(RX_BURST_SZ != kni_fifo_get(
- kni->rx_q, (void **)&va,
- RX_BURST_SZ)))
+ if (nb_burst != 0) {
+ if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, RX_BURST_SZ)
+ != RX_BURST_SZ))
goto except;
- if (unlikely(RX_BURST_SZ != kni_fifo_get(
- q->fifo, (void **)&skb,
- RX_BURST_SZ)))
+ if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, RX_BURST_SZ)
+ != RX_BURST_SZ))
goto except;
kni_vhost_enqueue_burst(kni, q, skb, va);
@@ -326,12 +312,10 @@ kni_chk_vhost_rx(struct kni_dev *kni)
/* all leftover, do one by one */
for (i = 0; i < nb_backlog; ++i) {
- if (unlikely(1 != kni_fifo_get(
- kni->rx_q,(void **)&va, 1)))
+ if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, 1) != 1))
goto except;
- if (unlikely(1 != kni_fifo_get(
- q->fifo, (void **)&skb, 1)))
+ if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, 1) != 1))
goto except;
kni_vhost_enqueue(kni, q, *skb, *va);
@@ -342,7 +326,7 @@ kni_chk_vhost_rx(struct kni_dev *kni)
((nb_mbuf < RX_BURST_SZ) && (nb_mbuf != 0))) {
wake_up_interruptible_poll(sk_sleep(&q->sk),
POLLIN | POLLRDNORM | POLLRDBAND);
- KNI_DBG_RX("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n",
+ pr_debug("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n",
nb_mbuf, nb_skb, nb_in);
}
@@ -350,7 +334,7 @@ kni_chk_vhost_rx(struct kni_dev *kni)
except:
/* Failing should not happen */
- KNI_ERR("Fail to enqueue fifo, it shouldn't happen \n");
+ pr_err("Fail to enqueue fifo, it shouldn't happen\n");
BUG_ON(1);
return 0;
@@ -373,7 +357,7 @@ kni_sock_sndmsg(struct socket *sock,
if (unlikely(q == NULL || q->kni == NULL))
return 0;
- KNI_DBG_TX("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n",
+ pr_debug("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n",
#ifdef HAVE_IOV_ITER_MSGHDR
len, q->flags, (int)m->msg_iter.iov->iov_len);
#else
@@ -420,13 +404,14 @@ kni_sock_rcvmsg(struct socket *sock,
#ifdef RTE_KNI_VHOST_VNET_HDR_EN
if (likely(q->flags & IFF_VNET_HDR)) {
vnet_hdr_len = q->vnet_hdr_sz;
- if ((len -= vnet_hdr_len) < 0)
+ len -= vnet_hdr_len;
+ if (len < 0)
return -EINVAL;
}
#endif
- if (unlikely(0 == (pkt_len = kni_vhost_net_rx(q->kni,
- m, vnet_hdr_len, len))))
+ pkt_len = kni_vhost_net_rx(q->kni, m, vnet_hdr_len, len);
+ if (unlikely(pkt_len == 0))
return 0;
#ifdef RTE_KNI_VHOST_VNET_HDR_EN
@@ -440,7 +425,7 @@ kni_sock_rcvmsg(struct socket *sock,
#endif /* HAVE_IOV_ITER_MSGHDR */
return -EFAULT;
#endif /* RTE_KNI_VHOST_VNET_HDR_EN */
- KNI_DBG_RX("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n",
+ pr_debug("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n",
(unsigned long)len, q->flags, pkt_len);
return pkt_len + vnet_hdr_len;
@@ -448,25 +433,24 @@ kni_sock_rcvmsg(struct socket *sock,
/* dummy tap like ioctl */
static int
-kni_sock_ioctl(struct socket *sock, unsigned int cmd,
- unsigned long arg)
+kni_sock_ioctl(struct socket *sock, uint32_t cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
struct ifreq __user *ifr = argp;
- unsigned int __user *up = argp;
+ uint32_t __user *up = argp;
struct kni_vhost_queue *q =
container_of(sock->sk, struct kni_vhost_queue, sk);
struct kni_dev *kni;
- unsigned int u;
+ uint32_t u;
int __user *sp = argp;
int s;
int ret;
- KNI_DBG("tap ioctl cmd 0x%08x\n", cmd);
+ pr_debug("tap ioctl cmd 0x%08x\n", cmd);
switch (cmd) {
case TUNSETIFF:
- KNI_DBG("TUNSETIFF\n");
+ pr_debug("TUNSETIFF\n");
/* ignore the name, just look at flags */
if (get_user(u, &ifr->ifr_flags))
return -EFAULT;
@@ -480,7 +464,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
return ret;
case TUNGETIFF:
- KNI_DBG("TUNGETIFF\n");
+ pr_debug("TUNGETIFF\n");
rcu_read_lock_bh();
kni = rcu_dereference_bh(q->kni);
if (kni)
@@ -491,14 +475,14 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
return -ENOLINK;
ret = 0;
- if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) ||
- put_user(q->flags, &ifr->ifr_flags))
+ if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ)
+ || put_user(q->flags, &ifr->ifr_flags))
ret = -EFAULT;
dev_put(kni->net_dev);
return ret;
case TUNGETFEATURES:
- KNI_DBG("TUNGETFEATURES\n");
+ pr_debug("TUNGETFEATURES\n");
u = IFF_TAP | IFF_NO_PI;
#ifdef RTE_KNI_VHOST_VNET_HDR_EN
u |= IFF_VNET_HDR;
@@ -508,7 +492,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
return 0;
case TUNSETSNDBUF:
- KNI_DBG("TUNSETSNDBUF\n");
+ pr_debug("TUNSETSNDBUF\n");
if (get_user(u, up))
return -EFAULT;
@@ -519,7 +503,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
s = q->vnet_hdr_sz;
if (put_user(s, sp))
return -EFAULT;
- KNI_DBG("TUNGETVNETHDRSZ %d\n", s);
+ pr_debug("TUNGETVNETHDRSZ %d\n", s);
return 0;
case TUNSETVNETHDRSZ:
@@ -528,12 +512,12 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
if (s < (int)sizeof(struct virtio_net_hdr))
return -EINVAL;
- KNI_DBG("TUNSETVNETHDRSZ %d\n", s);
+ pr_debug("TUNSETVNETHDRSZ %d\n", s);
q->vnet_hdr_sz = s;
return 0;
case TUNSETOFFLOAD:
- KNI_DBG("TUNSETOFFLOAD %lx\n", arg);
+ pr_debug("TUNSETOFFLOAD %lx\n", arg);
#ifdef RTE_KNI_VHOST_VNET_HDR_EN
/* not support any offload yet */
if (!(q->flags & IFF_VNET_HDR))
@@ -545,26 +529,26 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
#endif
default:
- KNI_DBG("NOT SUPPORT\n");
+ pr_debug("NOT SUPPORT\n");
return -EINVAL;
}
}
static int
-kni_sock_compat_ioctl(struct socket *sock, unsigned int cmd,
+kni_sock_compat_ioctl(struct socket *sock, uint32_t cmd,
unsigned long arg)
{
/* 32 bits app on 64 bits OS to be supported later */
- KNI_PRINT("Not implemented.\n");
+ pr_debug("Not implemented.\n");
return -EINVAL;
}
#define KNI_VHOST_WAIT_WQ_SAFE() \
-do { \
+do { \
while ((BE_FINISH | BE_STOP) == kni->vq_status) \
- msleep(1); \
-}while(0) \
+ msleep(1); \
+} while (0) \
static int
@@ -577,7 +561,8 @@ kni_sock_release(struct socket *sock)
if (q == NULL)
return 0;
- if (NULL != (kni = q->kni)) {
+ kni = q->kni;
+ if (kni != NULL) {
kni->vq_status = BE_STOP;
KNI_VHOST_WAIT_WQ_SAFE();
kni->vhost_queue = NULL;
@@ -592,18 +577,17 @@ kni_sock_release(struct socket *sock)
sock_put(&q->sk);
- KNI_DBG("dummy sock release done\n");
+ pr_debug("dummy sock release done\n");
return 0;
}
int
-kni_sock_getname (struct socket *sock,
- struct sockaddr *addr,
- int *sockaddr_len, int peer)
+kni_sock_getname(struct socket *sock, struct sockaddr *addr,
+ int *sockaddr_len, int peer)
{
- KNI_DBG("dummy sock getname\n");
- ((struct sockaddr_ll*)addr)->sll_family = AF_PACKET;
+ pr_debug("dummy sock getname\n");
+ ((struct sockaddr_ll *)addr)->sll_family = AF_PACKET;
return 0;
}
@@ -646,7 +630,7 @@ kni_sk_destruct(struct sock *sk)
/* make sure there's no packet in buffer */
while (skb_dequeue(&sk->sk_receive_queue) != NULL)
- ;
+ ;
mb();
@@ -673,7 +657,7 @@ kni_vhost_backend_init(struct kni_dev *kni)
if (kni->vhost_queue != NULL)
return -1;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
+#ifdef HAVE_SK_ALLOC_KERN_PARAM
q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
&kni_raw_proto, 0);
#else
@@ -694,8 +678,9 @@ kni_vhost_backend_init(struct kni_dev *kni)
}
/* cache init */
- q->cache = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff),
- GFP_KERNEL);
+ q->cache = kzalloc(
+ RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff),
+ GFP_KERNEL);
if (!q->cache)
goto free_fd;
@@ -708,7 +693,7 @@ kni_vhost_backend_init(struct kni_dev *kni)
for (i = 0; i < RTE_KNI_VHOST_MAX_CACHE_SIZE; i++) {
elem = &q->cache[i];
- kni_fifo_put(fifo, (void**)&elem, 1);
+ kni_fifo_put(fifo, (void **)&elem, 1);
}
q->fifo = fifo;
@@ -738,14 +723,12 @@ kni_vhost_backend_init(struct kni_dev *kni)
kni->vq_status = BE_START;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
- KNI_DBG("backend init sockfd=%d, sock->wq=0x%16llx,"
- "sk->sk_wq=0x%16llx",
+#ifdef HAVE_SOCKET_WQ
+ pr_debug("backend init sockfd=%d, sock->wq=0x%16llx,sk->sk_wq=0x%16llx",
q->sockfd, (uint64_t)q->sock->wq,
(uint64_t)q->sk.sk_wq);
#else
- KNI_DBG("backend init sockfd=%d, sock->wait at 0x%16llx,"
- "sk->sk_sleep=0x%16llx",
+ pr_debug("backend init sockfd=%d, sock->wait at 0x%16llx,sk->sk_sleep=0x%16llx",
q->sockfd, (uint64_t)&q->sock->wait,
(uint64_t)q->sk.sk_sleep);
#endif
@@ -768,7 +751,7 @@ free_sock:
q->sock = NULL;
free_sk:
- sk_free((struct sock*)q);
+ sk_free((struct sock *)q);
return err;
}
@@ -781,6 +764,7 @@ show_sock_fd(struct device *dev, struct device_attribute *attr,
struct net_device *net_dev = container_of(dev, struct net_device, dev);
struct kni_dev *kni = netdev_priv(net_dev);
int sockfd = -1;
+
if (kni->vhost_queue != NULL)
sockfd = kni->vhost_queue->sockfd;
return snprintf(buf, 10, "%d\n", sockfd);
@@ -792,6 +776,7 @@ show_sock_en(struct device *dev, struct device_attribute *attr,
{
struct net_device *net_dev = container_of(dev, struct net_device, dev);
struct kni_dev *kni = netdev_priv(net_dev);
+
return snprintf(buf, 10, "%u\n", (kni->vhost_queue == NULL ? 0 : 1));
}
@@ -804,7 +789,7 @@ set_sock_en(struct device *dev, struct device_attribute *attr,
unsigned long en;
int err = 0;
- if (0 != kstrtoul(buf, 0, &en))
+ if (kstrtoul(buf, 0, &en) != 0)
return -EINVAL;
if (en)
@@ -818,7 +803,7 @@ static DEVICE_ATTR(sock_en, S_IRUGO | S_IWUSR, show_sock_en, set_sock_en);
static struct attribute *dev_attrs[] = {
&dev_attr_sock_fd.attr,
&dev_attr_sock_en.attr,
- NULL,
+ NULL,
};
static const struct attribute_group dev_attr_grp = {
@@ -836,7 +821,7 @@ kni_vhost_backend_release(struct kni_dev *kni)
/* dettach from kni */
q->kni = NULL;
- KNI_DBG("release backend done\n");
+ pr_debug("release backend done\n");
return 0;
}
@@ -851,7 +836,7 @@ kni_vhost_init(struct kni_dev *kni)
kni->vq_status = BE_STOP;
- KNI_DBG("kni_vhost_init done\n");
+ pr_debug("kni_vhost_init done\n");
return 0;
}
diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile
index 0bb5dc90..efe1e5fe 100644
--- a/lib/librte_ether/Makefile
+++ b/lib/librte_ether/Makefile
@@ -34,26 +34,25 @@ include $(RTE_SDK)/mk/rte.vars.mk
#
# library name
#
-LIB = libethdev.a
+LIB = librte_ethdev.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
EXPORT_MAP := rte_ether_version.map
-LIBABIVER := 4
+LIBABIVER := 5
SRCS-y += rte_ethdev.c
#
# Export include files
#
-SYMLINK-y-include += rte_ether.h
SYMLINK-y-include += rte_ethdev.h
SYMLINK-y-include += rte_eth_ctrl.h
SYMLINK-y-include += rte_dev_info.h
# this lib depends upon:
-DEPDIRS-y += lib/librte_eal lib/librte_mempool lib/librte_ring lib/librte_mbuf
+DEPDIRS-y += lib/librte_net lib/librte_eal lib/librte_mempool lib/librte_ring lib/librte_mbuf
include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ether/rte_dev_info.h b/lib/librte_ether/rte_dev_info.h
index 574683d3..aab6d1a6 100644
--- a/lib/librte_ether/rte_dev_info.h
+++ b/lib/librte_ether/rte_dev_info.h
@@ -34,6 +34,8 @@
#ifndef _RTE_DEV_INFO_H_
#define _RTE_DEV_INFO_H_
+#include <stdint.h>
+
/*
* Placeholder for accessing device registers
*/
diff --git a/lib/librte_ether/rte_eth_ctrl.h b/lib/librte_ether/rte_eth_ctrl.h
index c3a2c9e4..fe80eb01 100644
--- a/lib/librte_ether/rte_eth_ctrl.h
+++ b/lib/librte_ether/rte_eth_ctrl.h
@@ -34,6 +34,10 @@
#ifndef _RTE_ETH_CTRL_H_
#define _RTE_ETH_CTRL_H_
+#include <stdint.h>
+#include <rte_common.h>
+#include "rte_ether.h"
+
/**
* @file
*
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index a5b42aa8..fde8112f 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -58,7 +58,6 @@
#include <rte_atomic.h>
#include <rte_branch_prediction.h>
#include <rte_common.h>
-#include <rte_ring.h>
#include <rte_mempool.h>
#include <rte_malloc.h>
#include <rte_mbuf.h>
@@ -72,6 +71,7 @@
static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
static struct rte_eth_dev_data *rte_eth_dev_data;
+static uint8_t eth_dev_last_created_port;
static uint8_t nb_ports;
/* spinlock for eth device callbacks */
@@ -190,7 +190,7 @@ rte_eth_dev_find_free_port(void)
}
struct rte_eth_dev *
-rte_eth_dev_allocate(const char *name, enum rte_eth_dev_type type)
+rte_eth_dev_allocate(const char *name)
{
uint8_t port_id;
struct rte_eth_dev *eth_dev;
@@ -215,25 +215,11 @@ rte_eth_dev_allocate(const char *name, enum rte_eth_dev_type type)
snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name);
eth_dev->data->port_id = port_id;
eth_dev->attached = DEV_ATTACHED;
- eth_dev->dev_type = type;
+ eth_dev_last_created_port = port_id;
nb_ports++;
return eth_dev;
}
-static int
-rte_eth_dev_create_unique_device_name(char *name, size_t size,
- struct rte_pci_device *pci_dev)
-{
- int ret;
-
- ret = snprintf(name, size, "%d:%d.%d",
- pci_dev->addr.bus, pci_dev->addr.devid,
- pci_dev->addr.function);
- if (ret < 0)
- return ret;
- return 0;
-}
-
int
rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
{
@@ -245,9 +231,9 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
return 0;
}
-static int
-rte_eth_dev_init(struct rte_pci_driver *pci_drv,
- struct rte_pci_device *pci_dev)
+int
+rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv,
+ struct rte_pci_device *pci_dev)
{
struct eth_driver *eth_drv;
struct rte_eth_dev *eth_dev;
@@ -257,11 +243,10 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
eth_drv = (struct eth_driver *)pci_drv;
- /* Create unique Ethernet device name using PCI address */
- rte_eth_dev_create_unique_device_name(ethdev_name,
- sizeof(ethdev_name), pci_dev);
+ rte_eal_pci_device_name(&pci_dev->addr, ethdev_name,
+ sizeof(ethdev_name));
- eth_dev = rte_eth_dev_allocate(ethdev_name, RTE_ETH_DEV_PCI);
+ eth_dev = rte_eth_dev_allocate(ethdev_name);
if (eth_dev == NULL)
return -ENOMEM;
@@ -290,7 +275,7 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
return 0;
RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(vendor_id=0x%x device_id=0x%x) failed\n",
- pci_drv->name,
+ pci_drv->driver.name,
(unsigned) pci_dev->id.vendor_id,
(unsigned) pci_dev->id.device_id);
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
@@ -299,8 +284,8 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
return diag;
}
-static int
-rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
+int
+rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev)
{
const struct eth_driver *eth_drv;
struct rte_eth_dev *eth_dev;
@@ -310,9 +295,8 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
if (pci_dev == NULL)
return -EINVAL;
- /* Create unique Ethernet device name using PCI address */
- rte_eth_dev_create_unique_device_name(ethdev_name,
- sizeof(ethdev_name), pci_dev);
+ rte_eal_pci_device_name(&pci_dev->addr, ethdev_name,
+ sizeof(ethdev_name));
eth_dev = rte_eth_dev_allocated(ethdev_name);
if (eth_dev == NULL)
@@ -340,28 +324,6 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
return 0;
}
-/**
- * Register an Ethernet [Poll Mode] driver.
- *
- * Function invoked by the initialization function of an Ethernet driver
- * to simultaneously register itself as a PCI driver and as an Ethernet
- * Poll Mode Driver.
- * Invokes the rte_eal_pci_register() function to register the *pci_drv*
- * structure embedded in the *eth_drv* structure, after having stored the
- * address of the rte_eth_dev_init() function in the *devinit* field of
- * the *pci_drv* structure.
- * During the PCI probing phase, the rte_eth_dev_init() function is
- * invoked for each PCI [Ethernet device] matching the embedded PCI
- * identifiers provided by the driver.
- */
-void
-rte_eth_driver_register(struct eth_driver *eth_drv)
-{
- eth_drv->pci_drv.devinit = rte_eth_dev_init;
- eth_drv->pci_drv.devuninit = rte_eth_dev_uninit;
- rte_eal_pci_register(&eth_drv->pci_drv);
-}
-
int
rte_eth_dev_is_valid_port(uint8_t port_id)
{
@@ -385,27 +347,6 @@ rte_eth_dev_count(void)
return nb_ports;
}
-static enum rte_eth_dev_type
-rte_eth_dev_get_device_type(uint8_t port_id)
-{
- RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, RTE_ETH_DEV_UNKNOWN);
- return rte_eth_devices[port_id].dev_type;
-}
-
-static int
-rte_eth_dev_get_addr_by_port(uint8_t port_id, struct rte_pci_addr *addr)
-{
- RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
-
- if (addr == NULL) {
- RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
- return -EINVAL;
- }
-
- *addr = rte_eth_devices[port_id].pci_dev->addr;
- return 0;
-}
-
int
rte_eth_dev_get_name_by_port(uint8_t port_id, char *name)
{
@@ -451,34 +392,6 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id)
}
static int
-rte_eth_dev_get_port_by_addr(const struct rte_pci_addr *addr, uint8_t *port_id)
-{
- int i;
- struct rte_pci_device *pci_dev = NULL;
-
- if (addr == NULL) {
- RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
- return -EINVAL;
- }
-
- *port_id = RTE_MAX_ETHPORTS;
-
- for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
-
- pci_dev = rte_eth_devices[i].pci_dev;
-
- if (pci_dev &&
- !rte_eal_compare_pci_addr(&pci_dev->addr, addr)) {
-
- *port_id = i;
-
- return 0;
- }
- }
- return -ENODEV;
-}
-
-static int
rte_eth_dev_is_detachable(uint8_t port_id)
{
uint32_t dev_flags;
@@ -503,127 +416,49 @@ rte_eth_dev_is_detachable(uint8_t port_id)
return 1;
}
-/* attach the new physical device, then store port_id of the device */
-static int
-rte_eth_dev_attach_pdev(struct rte_pci_addr *addr, uint8_t *port_id)
+/* attach the new device, then store port_id of the device */
+int
+rte_eth_dev_attach(const char *devargs, uint8_t *port_id)
{
- /* re-construct pci_device_list */
- if (rte_eal_pci_scan())
- goto err;
- /* Invoke probe func of the driver can handle the new device. */
- if (rte_eal_pci_probe_one(addr))
- goto err;
+ int ret = -1;
+ int current = rte_eth_dev_count();
+ char *name = NULL;
+ char *args = NULL;
- if (rte_eth_dev_get_port_by_addr(addr, port_id))
+ if ((devargs == NULL) || (port_id == NULL)) {
+ ret = -EINVAL;
goto err;
+ }
- return 0;
-err:
- return -1;
-}
-
-/* detach the new physical device, then store pci_addr of the device */
-static int
-rte_eth_dev_detach_pdev(uint8_t port_id, struct rte_pci_addr *addr)
-{
- struct rte_pci_addr freed_addr;
- struct rte_pci_addr vp;
-
- /* get pci address by port id */
- if (rte_eth_dev_get_addr_by_port(port_id, &freed_addr))
+ /* parse devargs, then retrieve device name and args */
+ if (rte_eal_parse_devargs_str(devargs, &name, &args))
goto err;
- /* Zeroed pci addr means the port comes from virtual device */
- vp.domain = vp.bus = vp.devid = vp.function = 0;
- if (rte_eal_compare_pci_addr(&vp, &freed_addr) == 0)
+ ret = rte_eal_dev_attach(name, args);
+ if (ret < 0)
goto err;
- /* invoke devuninit func of the pci driver,
- * also remove the device from pci_device_list */
- if (rte_eal_pci_detach(&freed_addr))
+ /* no point looking at the port count if no port exists */
+ if (!rte_eth_dev_count()) {
+ RTE_LOG(ERR, EAL, "No port found for device (%s)\n", name);
+ ret = -1;
goto err;
+ }
- *addr = freed_addr;
- return 0;
-err:
- return -1;
-}
-
-/* attach the new virtual device, then store port_id of the device */
-static int
-rte_eth_dev_attach_vdev(const char *vdevargs, uint8_t *port_id)
-{
- char *name = NULL, *args = NULL;
- int ret = -1;
-
- /* parse vdevargs, then retrieve device name and args */
- if (rte_eal_parse_devargs_str(vdevargs, &name, &args))
- goto end;
-
- /* walk around dev_driver_list to find the driver of the device,
- * then invoke probe function of the driver.
- * rte_eal_vdev_init() updates port_id allocated after
- * initialization.
+ /* if nothing happened, there is a bug here, since some driver told us
+ * it did attach a device, but did not create a port.
*/
- if (rte_eal_vdev_init(name, args))
- goto end;
-
- if (rte_eth_dev_get_port_by_name(name, port_id))
- goto end;
-
- ret = 0;
-end:
- free(name);
- free(args);
-
- return ret;
-}
-
-/* detach the new virtual device, then store the name of the device */
-static int
-rte_eth_dev_detach_vdev(uint8_t port_id, char *vdevname)
-{
- char name[RTE_ETH_NAME_MAX_LEN];
-
- /* get device name by port id */
- if (rte_eth_dev_get_name_by_port(port_id, name))
- goto err;
- /* walk around dev_driver_list to find the driver of the device,
- * then invoke uninit function of the driver */
- if (rte_eal_vdev_uninit(name))
- goto err;
-
- strncpy(vdevname, name, sizeof(name));
- return 0;
-err:
- return -1;
-}
-
-/* attach the new device, then store port_id of the device */
-int
-rte_eth_dev_attach(const char *devargs, uint8_t *port_id)
-{
- struct rte_pci_addr addr;
- int ret = -1;
-
- if ((devargs == NULL) || (port_id == NULL)) {
- ret = -EINVAL;
+ if (current == rte_eth_dev_count()) {
+ ret = -1;
goto err;
}
- if (eal_parse_pci_DomBDF(devargs, &addr) == 0) {
- ret = rte_eth_dev_attach_pdev(&addr, port_id);
- if (ret < 0)
- goto err;
- } else {
- ret = rte_eth_dev_attach_vdev(devargs, port_id);
- if (ret < 0)
- goto err;
- }
+ *port_id = eth_dev_last_created_port;
+ ret = 0;
- return 0;
err:
- RTE_LOG(ERR, EAL, "Driver, cannot attach the device\n");
+ free(name);
+ free(args);
return ret;
}
@@ -631,7 +466,6 @@ err:
int
rte_eth_dev_detach(uint8_t port_id, char *name)
{
- struct rte_pci_addr addr;
int ret = -1;
if (name == NULL) {
@@ -639,33 +473,19 @@ rte_eth_dev_detach(uint8_t port_id, char *name)
goto err;
}
- /* check whether the driver supports detach feature, or not */
+ /* FIXME: move this to eal, once device flags are relocated there */
if (rte_eth_dev_is_detachable(port_id))
goto err;
- if (rte_eth_dev_get_device_type(port_id) == RTE_ETH_DEV_PCI) {
- ret = rte_eth_dev_get_addr_by_port(port_id, &addr);
- if (ret < 0)
- goto err;
-
- ret = rte_eth_dev_detach_pdev(port_id, &addr);
- if (ret < 0)
- goto err;
-
- snprintf(name, RTE_ETH_NAME_MAX_LEN,
- "%04x:%02x:%02x.%d",
- addr.domain, addr.bus,
- addr.devid, addr.function);
- } else {
- ret = rte_eth_dev_detach_vdev(port_id, name);
- if (ret < 0)
- goto err;
- }
+ snprintf(name, sizeof(rte_eth_devices[port_id].data->name),
+ "%s", rte_eth_devices[port_id].data->name);
+ ret = rte_eal_dev_detach(name);
+ if (ret < 0)
+ goto err;
return 0;
err:
- RTE_LOG(ERR, EAL, "Driver, cannot detach the device\n");
return ret;
}
@@ -2689,7 +2509,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
void
_rte_eth_dev_callback_process(struct rte_eth_dev *dev,
- enum rte_eth_event_type event)
+ enum rte_eth_event_type event, void *cb_arg)
{
struct rte_eth_dev_callback *cb_lst;
struct rte_eth_dev_callback dev_cb;
@@ -2700,6 +2520,9 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
continue;
dev_cb = *cb_lst;
cb_lst->active = 1;
+ if (cb_arg != NULL)
+ dev_cb.cb_arg = (void *) cb_arg;
+
rte_spinlock_unlock(&rte_eth_dev_cb_lock);
dev_cb.cb_fn(dev->data->port_id, dev_cb.event,
dev_cb.cb_arg);
@@ -2749,7 +2572,7 @@ rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name,
const struct rte_memzone *mz;
snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
- dev->driver->pci_drv.name, ring_name,
+ dev->driver->pci_drv.driver.name, ring_name,
dev->data->port_id, queue_id);
mz = rte_memzone_lookup(z_name);
@@ -3390,8 +3213,8 @@ rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, struct rte_pci_device *pci_de
eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
eth_dev->data->kdrv = pci_dev->kdrv;
- eth_dev->data->numa_node = pci_dev->numa_node;
- eth_dev->data->drv_name = pci_dev->driver->name;
+ eth_dev->data->numa_node = pci_dev->device.numa_node;
+ eth_dev->data->drv_name = pci_dev->driver->driver.name;
}
int
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index b0fe0334..96781792 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -190,6 +190,9 @@ struct rte_mbuf;
/**
* A structure used to retrieve statistics for an Ethernet port.
+ * Not all statistics fields in struct rte_eth_stats are supported
+ * by any type of network interface card (NIC). If any statistics
+ * field is not supported, its value is 0.
*/
struct rte_eth_stats {
uint64_t ipackets; /**< Total number of successfully received packets. */
@@ -198,7 +201,7 @@ struct rte_eth_stats {
uint64_t obytes; /**< Total number of successfully transmitted bytes. */
uint64_t imissed;
/**< Total of RX packets dropped by the HW,
- * because there are no available mbufs (i.e. RX queues are full).
+ * because there are no available buffer (i.e. RX queues are full).
*/
uint64_t ierrors; /**< Total number of erroneous received packets. */
uint64_t oerrors; /**< Total number of failed transmitted packets. */
@@ -255,6 +258,7 @@ struct rte_eth_stats {
/**
* A structure used to retrieve link-level information of an Ethernet port.
*/
+__extension__
struct rte_eth_link {
uint32_t link_speed; /**< ETH_SPEED_NUM_ */
uint16_t link_duplex : 1; /**< ETH_LINK_[HALF/FULL]_DUPLEX */
@@ -346,6 +350,7 @@ struct rte_eth_rxmode {
enum rte_eth_rx_mq_mode mq_mode;
uint32_t max_rx_pkt_len; /**< Only used if jumbo_frame enabled. */
uint16_t split_hdr_size; /**< hdr buf size (header_split enabled).*/
+ __extension__
uint16_t header_split : 1, /**< Header Split enable. */
hw_ip_checksum : 1, /**< IP/UDP/TCP checksum offload enable. */
hw_vlan_filter : 1, /**< VLAN filter enable. */
@@ -645,6 +650,7 @@ struct rte_eth_txmode {
/* For i40e specifically */
uint16_t pvid;
+ __extension__
uint8_t hw_vlan_reject_tagged : 1,
/**< If set, reject sending out tagged pkts */
hw_vlan_reject_untagged : 1,
@@ -864,6 +870,10 @@ struct rte_eth_conf {
#define DEV_TX_OFFLOAD_UDP_TSO 0x00000040
#define DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000080 /**< Used for tunneling packet. */
#define DEV_TX_OFFLOAD_QINQ_INSERT 0x00000100
+#define DEV_TX_OFFLOAD_VXLAN_TNL_TSO 0x00000200 /**< Used for tunneling packet. */
+#define DEV_TX_OFFLOAD_GRE_TNL_TSO 0x00000400 /**< Used for tunneling packet. */
+#define DEV_TX_OFFLOAD_IPIP_TNL_TSO 0x00000800 /**< Used for tunneling packet. */
+#define DEV_TX_OFFLOAD_GENEVE_TNL_TSO 0x00001000 /**< Used for tunneling packet. */
/**
* Ethernet device information
@@ -1603,17 +1613,6 @@ struct rte_eth_rxtx_callback {
};
/**
- * The eth device type.
- */
-enum rte_eth_dev_type {
- RTE_ETH_DEV_UNKNOWN, /**< unknown device type */
- RTE_ETH_DEV_PCI,
- /**< Physical function and Virtual function of PCI devices */
- RTE_ETH_DEV_VIRTUAL, /**< non hardware device */
- RTE_ETH_DEV_MAX /**< max value of this enum */
-};
-
-/**
* @internal
* The generic data structure associated with each ethernet device.
*
@@ -1643,7 +1642,6 @@ struct rte_eth_dev {
*/
struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT];
uint8_t attached; /**< Flag indicating the port is attached */
- enum rte_eth_dev_type dev_type; /**< Flag indicating the device type */
} __rte_cache_aligned;
struct rte_eth_dev_sriov {
@@ -1691,6 +1689,7 @@ struct rte_eth_dev_data {
struct ether_addr* hash_mac_addrs;
/** Device Ethernet MAC addresses of hash filtering. */
uint8_t port_id; /**< Device [external] port identifier. */
+ __extension__
uint8_t promiscuous : 1, /**< RX promiscuous mode ON(1) / OFF(0). */
scattered_rx : 1, /**< RX of scattered packets is ON(1) / OFF(0) */
all_multicast : 1, /**< RX all multicast mode ON(1) / OFF(0). */
@@ -1756,8 +1755,7 @@ struct rte_eth_dev *rte_eth_dev_allocated(const char *name);
* @return
* - Slot in the rte_dev_devices array for a new device;
*/
-struct rte_eth_dev *rte_eth_dev_allocate(const char *name,
- enum rte_eth_dev_type type);
+struct rte_eth_dev *rte_eth_dev_allocate(const char *name);
/**
* @internal
@@ -1776,7 +1774,7 @@ int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev);
* @param devargs
* A pointer to a strings array describing the new device
* to be attached. The strings should be a pci address like
- * '0000:01:00.0' or virtual device name like 'eth_pcap0'.
+ * '0000:01:00.0' or virtual device name like 'net_pcap0'.
* @param port_id
* A pointer to a port identifier actually attached.
* @return
@@ -1871,18 +1869,6 @@ struct eth_driver {
};
/**
- * @internal
- * A function invoked by the initialization function of an Ethernet driver
- * to simultaneously register itself as a PCI driver and as an Ethernet
- * Poll Mode Driver (PMD).
- *
- * @param eth_drv
- * The pointer to the *eth_driver* structure associated with
- * the Ethernet driver.
- */
-void rte_eth_driver_register(struct eth_driver *eth_drv);
-
-/**
* Convert a numerical speed in Mbps to a bitmap flag that can be used in
* the bitmap link_speeds of the struct rte_eth_conf
*
@@ -3047,6 +3033,7 @@ enum rte_eth_event_type {
/**< queue state event (enabled/disabled) */
RTE_ETH_EVENT_INTR_RESET,
/**< reset interrupt event, sent to VF on PF reset */
+ RTE_ETH_EVENT_VF_MBOX, /**< message from the VF received by PF */
RTE_ETH_EVENT_MAX /**< max value of this enum */
};
@@ -3068,6 +3055,11 @@ typedef void (*rte_eth_dev_cb_fn)(uint8_t port_id, \
* @param cb_arg
* Pointer to the parameters for the registered callback.
*
+ * The user data is overwritten in the case of RTE_ETH_EVENT_VF_MBOX.
+ * This even occurs when a message from the VF is received by the PF.
+ * The user data is overwritten with struct rte_pmd_ixgbe_mb_event_param.
+ * This struct is defined in rte_pmd_ixgbe.h.
+ *
* @return
* - On success, zero.
* - On failure, a negative value.
@@ -3106,12 +3098,16 @@ int rte_eth_dev_callback_unregister(uint8_t port_id,
* Pointer to struct rte_eth_dev.
* @param event
* Eth device interrupt event type.
+ * @param cb_arg
+ * Update callback parameter to pass data back to user application.
+ * This allows the user application to decide if a particular function
+ * is permitted or not.
*
* @return
* void
*/
void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
- enum rte_eth_event_type event);
+ enum rte_eth_event_type event, void *cb_arg);
/**
* When there is no rx packet coming in Rx Queue for a long time, we can
@@ -4341,7 +4337,7 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
/**
* Get the port id from pci adrress or device name
-* Ex: 0000:2:00.0 or vdev name eth_pcap0
+* Ex: 0000:2:00.0 or vdev name net_pcap0
*
* @param name
* pci address or name of the device
@@ -4368,6 +4364,21 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id);
int
rte_eth_dev_get_name_by_port(uint8_t port_id, char *name);
+/**
+ * @internal
+ * Wrapper for use by pci drivers as a .probe function to attach to a ethdev
+ * interface.
+ */
+int rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv,
+ struct rte_pci_device *pci_dev);
+
+/**
+ * @internal
+ * Wrapper for use by pci drivers as a .remove function to detach a ethdev
+ * interface.
+ */
+int rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index 45ddf44c..72be66d8 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -78,7 +78,6 @@ DPDK_2.2 {
rte_eth_dev_vlan_filter;
rte_eth_dev_wd_timeout_store;
rte_eth_dma_zone_reserve;
- rte_eth_driver_register;
rte_eth_led_off;
rte_eth_led_on;
rte_eth_link;
@@ -138,4 +137,13 @@ DPDK_16.07 {
rte_eth_dev_get_name_by_port;
rte_eth_dev_get_port_by_name;
rte_eth_xstats_get_names;
+
} DPDK_16.04;
+
+DPDK_16.11 {
+ global:
+
+ rte_eth_dev_pci_probe;
+ rte_eth_dev_pci_remove;
+
+} DPDK_16.07;
diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index d6e68c68..51db006a 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -98,6 +98,7 @@ rte_hash_find_existing(const char *name)
void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
{
+ h->cmp_jump_table_idx = KEY_CUSTOM;
h->rte_hash_custom_cmp_eq = func;
}
@@ -283,6 +284,15 @@ rte_hash_create(const struct rte_hash_parameters *params)
h->free_slots = r;
h->hw_trans_mem_support = hw_trans_mem_support;
+#if defined(RTE_ARCH_X86)
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+ h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2;
+ else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
+ h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
+ else
+#endif
+ h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
+
/* Turn on multi-writer only with explicit flat from user and TM
* support.
*/
@@ -421,10 +431,10 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt)
*/
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
/* Search for space in alternative locations */
- next_bucket_idx = bkt->signatures[i].alt & h->bucket_bitmask;
+ next_bucket_idx = bkt->sig_alt[i] & h->bucket_bitmask;
next_bkt[i] = &h->buckets[next_bucket_idx];
for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) {
- if (next_bkt[i]->signatures[j].sig == NULL_SIGNATURE)
+ if (next_bkt[i]->key_idx[j] == EMPTY_SLOT)
break;
}
@@ -434,8 +444,8 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt)
/* Alternative location has spare room (end of recursive function) */
if (i != RTE_HASH_BUCKET_ENTRIES) {
- next_bkt[i]->signatures[j].alt = bkt->signatures[i].current;
- next_bkt[i]->signatures[j].current = bkt->signatures[i].alt;
+ next_bkt[i]->sig_alt[j] = bkt->sig_current[i];
+ next_bkt[i]->sig_current[j] = bkt->sig_alt[i];
next_bkt[i]->key_idx[j] = bkt->key_idx[i];
return i;
}
@@ -464,8 +474,8 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt)
bkt->flag[i] = 0;
nr_pushes = 0;
if (ret >= 0) {
- next_bkt[i]->signatures[ret].alt = bkt->signatures[i].current;
- next_bkt[i]->signatures[ret].current = bkt->signatures[i].alt;
+ next_bkt[i]->sig_alt[ret] = bkt->sig_current[i];
+ next_bkt[i]->sig_current[ret] = bkt->sig_alt[i];
next_bkt[i]->key_idx[ret] = bkt->key_idx[i];
return i;
} else
@@ -547,8 +557,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
/* Check if key is already inserted in primary location */
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
- if (prim_bkt->signatures[i].current == sig &&
- prim_bkt->signatures[i].alt == alt_hash) {
+ if (prim_bkt->sig_current[i] == sig &&
+ prim_bkt->sig_alt[i] == alt_hash) {
k = (struct rte_hash_key *) ((char *)keys +
prim_bkt->key_idx[i] * h->key_entry_size);
if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -567,8 +577,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
/* Check if key is already inserted in secondary location */
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
- if (sec_bkt->signatures[i].alt == sig &&
- sec_bkt->signatures[i].current == alt_hash) {
+ if (sec_bkt->sig_alt[i] == sig &&
+ sec_bkt->sig_current[i] == alt_hash) {
k = (struct rte_hash_key *) ((char *)keys +
sec_bkt->key_idx[i] * h->key_entry_size);
if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -613,9 +623,9 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
#endif
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
/* Check if slot is available */
- if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) {
- prim_bkt->signatures[i].current = sig;
- prim_bkt->signatures[i].alt = alt_hash;
+ if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
+ prim_bkt->sig_current[i] = sig;
+ prim_bkt->sig_alt[i] = alt_hash;
prim_bkt->key_idx[i] = new_idx;
break;
}
@@ -635,8 +645,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
*/
ret = make_space_bucket(h, prim_bkt);
if (ret >= 0) {
- prim_bkt->signatures[ret].current = sig;
- prim_bkt->signatures[ret].alt = alt_hash;
+ prim_bkt->sig_current[ret] = sig;
+ prim_bkt->sig_alt[ret] = alt_hash;
prim_bkt->key_idx[ret] = new_idx;
if (h->add_key == ADD_KEY_MULTIWRITER)
rte_spinlock_unlock(h->multiwriter_lock);
@@ -710,8 +720,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
/* Check if key is in primary location */
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
- if (bkt->signatures[i].current == sig &&
- bkt->signatures[i].sig != NULL_SIGNATURE) {
+ if (bkt->sig_current[i] == sig &&
+ bkt->key_idx[i] != EMPTY_SLOT) {
k = (struct rte_hash_key *) ((char *)keys +
bkt->key_idx[i] * h->key_entry_size);
if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -733,8 +743,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
/* Check if key is in secondary location */
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
- if (bkt->signatures[i].current == alt_hash &&
- bkt->signatures[i].alt == sig) {
+ if (bkt->sig_current[i] == alt_hash &&
+ bkt->sig_alt[i] == sig) {
k = (struct rte_hash_key *) ((char *)keys +
bkt->key_idx[i] * h->key_entry_size);
if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -788,7 +798,8 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
unsigned lcore_id, n_slots;
struct lcore_cache *cached_free_slots;
- bkt->signatures[i].sig = NULL_SIGNATURE;
+ bkt->sig_current[i] = NULL_SIGNATURE;
+ bkt->sig_alt[i] = NULL_SIGNATURE;
if (h->hw_trans_mem_support) {
lcore_id = rte_lcore_id();
cached_free_slots = &h->local_free_slots[lcore_id];
@@ -826,8 +837,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
/* Check if key is in primary location */
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
- if (bkt->signatures[i].current == sig &&
- bkt->signatures[i].sig != NULL_SIGNATURE) {
+ if (bkt->sig_current[i] == sig &&
+ bkt->key_idx[i] != EMPTY_SLOT) {
k = (struct rte_hash_key *) ((char *)keys +
bkt->key_idx[i] * h->key_entry_size);
if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -838,7 +849,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
* substracting the first dummy index
*/
ret = bkt->key_idx[i] - 1;
- bkt->key_idx[i] = 0;
+ bkt->key_idx[i] = EMPTY_SLOT;
return ret;
}
}
@@ -851,8 +862,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
/* Check if key is in secondary location */
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
- if (bkt->signatures[i].current == alt_hash &&
- bkt->signatures[i].sig != NULL_SIGNATURE) {
+ if (bkt->sig_current[i] == alt_hash &&
+ bkt->key_idx[i] != EMPTY_SLOT) {
k = (struct rte_hash_key *) ((char *)keys +
bkt->key_idx[i] * h->key_entry_size);
if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -863,7 +874,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
* substracting the first dummy index
*/
ret = bkt->key_idx[i] - 1;
- bkt->key_idx[i] = 0;
+ bkt->key_idx[i] = EMPTY_SLOT;
return ret;
}
}
@@ -907,280 +918,189 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
return 0;
}
-/* Lookup bulk stage 0: Prefetch input key */
static inline void
-lookup_stage0(unsigned *idx, uint64_t *lookup_mask,
- const void * const *keys)
+compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
+ const struct rte_hash_bucket *prim_bkt,
+ const struct rte_hash_bucket *sec_bkt,
+ hash_sig_t prim_hash, hash_sig_t sec_hash,
+ enum rte_hash_sig_compare_function sig_cmp_fn)
{
- *idx = __builtin_ctzl(*lookup_mask);
- if (*lookup_mask == 0)
- *idx = 0;
+ unsigned int i;
+
+ switch (sig_cmp_fn) {
+#ifdef RTE_MACHINE_CPUFLAG_AVX2
+ case RTE_HASH_COMPARE_AVX2:
+ *prim_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
+ _mm256_load_si256(
+ (__m256i const *)prim_bkt->sig_current),
+ _mm256_set1_epi32(prim_hash)));
+ *sec_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
+ _mm256_load_si256(
+ (__m256i const *)sec_bkt->sig_current),
+ _mm256_set1_epi32(sec_hash)));
+ break;
+#endif
+#ifdef RTE_MACHINE_CPUFLAG_SSE2
+ case RTE_HASH_COMPARE_SSE:
+ /* Compare the first 4 signatures in the bucket */
+ *prim_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+ _mm_load_si128(
+ (__m128i const *)prim_bkt->sig_current),
+ _mm_set1_epi32(prim_hash)));
+ *prim_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+ _mm_load_si128(
+ (__m128i const *)&prim_bkt->sig_current[4]),
+ _mm_set1_epi32(prim_hash)))) << 4;
+ /* Compare the first 4 signatures in the bucket */
+ *sec_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+ _mm_load_si128(
+ (__m128i const *)sec_bkt->sig_current),
+ _mm_set1_epi32(sec_hash)));
+ *sec_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+ _mm_load_si128(
+ (__m128i const *)&sec_bkt->sig_current[4]),
+ _mm_set1_epi32(sec_hash)))) << 4;
+ break;
+#endif
+ default:
+ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+ *prim_hash_matches |=
+ ((prim_hash == prim_bkt->sig_current[i]) << i);
+ *sec_hash_matches |=
+ ((sec_hash == sec_bkt->sig_current[i]) << i);
+ }
+ }
- rte_prefetch0(keys[*idx]);
- *lookup_mask &= ~(1llu << *idx);
}
-/*
- * Lookup bulk stage 1: Calculate primary/secondary hashes
- * and prefetch primary/secondary buckets
- */
+#define PREFETCH_OFFSET 4
static inline void
-lookup_stage1(unsigned idx, hash_sig_t *prim_hash, hash_sig_t *sec_hash,
- const struct rte_hash_bucket **primary_bkt,
- const struct rte_hash_bucket **secondary_bkt,
- hash_sig_t *hash_vals, const void * const *keys,
- const struct rte_hash *h)
+__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
+ int32_t num_keys, int32_t *positions,
+ uint64_t *hit_mask, void *data[])
{
- *prim_hash = rte_hash_hash(h, keys[idx]);
- hash_vals[idx] = *prim_hash;
- *sec_hash = rte_hash_secondary_hash(*prim_hash);
+ uint64_t hits = 0;
+ int32_t i;
+ uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
+ uint32_t sec_hash[RTE_HASH_LOOKUP_BULK_MAX];
+ const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+ const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+ uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
+ uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
+
+ /* Prefetch first keys */
+ for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
+ rte_prefetch0(keys[i]);
- *primary_bkt = &h->buckets[*prim_hash & h->bucket_bitmask];
- *secondary_bkt = &h->buckets[*sec_hash & h->bucket_bitmask];
+ /*
+ * Prefetch rest of the keys, calculate primary and
+ * secondary bucket and prefetch them
+ */
+ for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
+ rte_prefetch0(keys[i + PREFETCH_OFFSET]);
- rte_prefetch0(*primary_bkt);
- rte_prefetch0(*secondary_bkt);
-}
+ prim_hash[i] = rte_hash_hash(h, keys[i]);
+ sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
-/*
- * Lookup bulk stage 2: Search for match hashes in primary/secondary locations
- * and prefetch first key slot
- */
-static inline void
-lookup_stage2(unsigned idx, hash_sig_t prim_hash, hash_sig_t sec_hash,
- const struct rte_hash_bucket *prim_bkt,
- const struct rte_hash_bucket *sec_bkt,
- const struct rte_hash_key **key_slot, int32_t *positions,
- uint64_t *extra_hits_mask, const void *keys,
- const struct rte_hash *h)
-{
- unsigned prim_hash_matches, sec_hash_matches, key_idx, i;
- unsigned total_hash_matches;
+ primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
+ secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
- prim_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES;
- sec_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES;
- for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
- prim_hash_matches |= ((prim_hash == prim_bkt->signatures[i].current) << i);
- sec_hash_matches |= ((sec_hash == sec_bkt->signatures[i].current) << i);
+ rte_prefetch0(primary_bkt[i]);
+ rte_prefetch0(secondary_bkt[i]);
}
- key_idx = prim_bkt->key_idx[__builtin_ctzl(prim_hash_matches)];
- if (key_idx == 0)
- key_idx = sec_bkt->key_idx[__builtin_ctzl(sec_hash_matches)];
+ /* Calculate and prefetch rest of the buckets */
+ for (; i < num_keys; i++) {
+ prim_hash[i] = rte_hash_hash(h, keys[i]);
+ sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
- total_hash_matches = (prim_hash_matches |
- (sec_hash_matches << (RTE_HASH_BUCKET_ENTRIES + 1)));
- *key_slot = (const struct rte_hash_key *) ((const char *)keys +
- key_idx * h->key_entry_size);
+ primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
+ secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
- rte_prefetch0(*key_slot);
- /*
- * Return index where key is stored,
- * substracting the first dummy index
- */
- positions[idx] = (key_idx - 1);
+ rte_prefetch0(primary_bkt[i]);
+ rte_prefetch0(secondary_bkt[i]);
+ }
- *extra_hits_mask |= (uint64_t)(__builtin_popcount(total_hash_matches) > 3) << idx;
+ /* Compare signatures and prefetch key slot of first hit */
+ for (i = 0; i < num_keys; i++) {
+ compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
+ primary_bkt[i], secondary_bkt[i],
+ prim_hash[i], sec_hash[i], h->sig_cmp_fn);
+
+ if (prim_hitmask[i]) {
+ uint32_t first_hit = __builtin_ctzl(prim_hitmask[i]);
+ uint32_t key_idx = primary_bkt[i]->key_idx[first_hit];
+ const struct rte_hash_key *key_slot =
+ (const struct rte_hash_key *)(
+ (const char *)h->key_store +
+ key_idx * h->key_entry_size);
+ rte_prefetch0(key_slot);
+ continue;
+ }
-}
+ if (sec_hitmask[i]) {
+ uint32_t first_hit = __builtin_ctzl(sec_hitmask[i]);
+ uint32_t key_idx = secondary_bkt[i]->key_idx[first_hit];
+ const struct rte_hash_key *key_slot =
+ (const struct rte_hash_key *)(
+ (const char *)h->key_store +
+ key_idx * h->key_entry_size);
+ rte_prefetch0(key_slot);
+ }
+ }
+ /* Compare keys, first hits in primary first */
+ for (i = 0; i < num_keys; i++) {
+ positions[i] = -ENOENT;
+ while (prim_hitmask[i]) {
+ uint32_t hit_index = __builtin_ctzl(prim_hitmask[i]);
+
+ uint32_t key_idx = primary_bkt[i]->key_idx[hit_index];
+ const struct rte_hash_key *key_slot =
+ (const struct rte_hash_key *)(
+ (const char *)h->key_store +
+ key_idx * h->key_entry_size);
+ /*
+ * If key index is 0, do not compare key,
+ * as it is checking the dummy slot
+ */
+ if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
+ if (data != NULL)
+ data[i] = key_slot->pdata;
-/* Lookup bulk stage 3: Check if key matches, update hit mask and return data */
-static inline void
-lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * const *keys,
- const int32_t *positions, void *data[], uint64_t *hits,
- const struct rte_hash *h)
-{
- unsigned hit;
- unsigned key_idx;
+ hits |= 1ULL << i;
+ positions[i] = key_idx - 1;
+ goto next_key;
+ }
+ prim_hitmask[i] &= ~(1 << (hit_index));
+ }
- hit = !rte_hash_cmp_eq(key_slot->key, keys[idx], h);
- if (data != NULL)
- data[idx] = key_slot->pdata;
+ while (sec_hitmask[i]) {
+ uint32_t hit_index = __builtin_ctzl(sec_hitmask[i]);
- key_idx = positions[idx] + 1;
- /*
- * If key index is 0, force hit to be 0, in case key to be looked up
- * is all zero (as in the dummy slot), which would result in a wrong hit
- */
- *hits |= (uint64_t)(hit && !!key_idx) << idx;
-}
+ uint32_t key_idx = secondary_bkt[i]->key_idx[hit_index];
+ const struct rte_hash_key *key_slot =
+ (const struct rte_hash_key *)(
+ (const char *)h->key_store +
+ key_idx * h->key_entry_size);
+ /*
+ * If key index is 0, do not compare key,
+ * as it is checking the dummy slot
+ */
-static inline void
-__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
- uint32_t num_keys, int32_t *positions,
- uint64_t *hit_mask, void *data[])
-{
- uint64_t hits = 0;
- uint64_t extra_hits_mask = 0;
- uint64_t lookup_mask, miss_mask;
- unsigned idx;
- const void *key_store = h->key_store;
- int ret;
- hash_sig_t hash_vals[RTE_HASH_LOOKUP_BULK_MAX];
-
- unsigned idx00, idx01, idx10, idx11, idx20, idx21, idx30, idx31;
- const struct rte_hash_bucket *primary_bkt10, *primary_bkt11;
- const struct rte_hash_bucket *secondary_bkt10, *secondary_bkt11;
- const struct rte_hash_bucket *primary_bkt20, *primary_bkt21;
- const struct rte_hash_bucket *secondary_bkt20, *secondary_bkt21;
- const struct rte_hash_key *k_slot20, *k_slot21, *k_slot30, *k_slot31;
- hash_sig_t primary_hash10, primary_hash11;
- hash_sig_t secondary_hash10, secondary_hash11;
- hash_sig_t primary_hash20, primary_hash21;
- hash_sig_t secondary_hash20, secondary_hash21;
-
- lookup_mask = (uint64_t) -1 >> (64 - num_keys);
- miss_mask = lookup_mask;
-
- lookup_stage0(&idx00, &lookup_mask, keys);
- lookup_stage0(&idx01, &lookup_mask, keys);
-
- idx10 = idx00, idx11 = idx01;
-
- lookup_stage0(&idx00, &lookup_mask, keys);
- lookup_stage0(&idx01, &lookup_mask, keys);
- lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
- &primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
- lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
- &primary_bkt11, &secondary_bkt11, hash_vals, keys, h);
-
- primary_bkt20 = primary_bkt10;
- primary_bkt21 = primary_bkt11;
- secondary_bkt20 = secondary_bkt10;
- secondary_bkt21 = secondary_bkt11;
- primary_hash20 = primary_hash10;
- primary_hash21 = primary_hash11;
- secondary_hash20 = secondary_hash10;
- secondary_hash21 = secondary_hash11;
- idx20 = idx10, idx21 = idx11;
- idx10 = idx00, idx11 = idx01;
-
- lookup_stage0(&idx00, &lookup_mask, keys);
- lookup_stage0(&idx01, &lookup_mask, keys);
- lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
- &primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
- lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
- &primary_bkt11, &secondary_bkt11, hash_vals, keys, h);
- lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
- secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
- key_store, h);
- lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
- secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
- key_store, h);
-
- while (lookup_mask) {
- k_slot30 = k_slot20, k_slot31 = k_slot21;
- idx30 = idx20, idx31 = idx21;
- primary_bkt20 = primary_bkt10;
- primary_bkt21 = primary_bkt11;
- secondary_bkt20 = secondary_bkt10;
- secondary_bkt21 = secondary_bkt11;
- primary_hash20 = primary_hash10;
- primary_hash21 = primary_hash11;
- secondary_hash20 = secondary_hash10;
- secondary_hash21 = secondary_hash11;
- idx20 = idx10, idx21 = idx11;
- idx10 = idx00, idx11 = idx01;
-
- lookup_stage0(&idx00, &lookup_mask, keys);
- lookup_stage0(&idx01, &lookup_mask, keys);
- lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
- &primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
- lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
- &primary_bkt11, &secondary_bkt11, hash_vals, keys, h);
- lookup_stage2(idx20, primary_hash20, secondary_hash20,
- primary_bkt20, secondary_bkt20, &k_slot20, positions,
- &extra_hits_mask, key_store, h);
- lookup_stage2(idx21, primary_hash21, secondary_hash21,
- primary_bkt21, secondary_bkt21, &k_slot21, positions,
- &extra_hits_mask, key_store, h);
- lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
- lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
- }
+ if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
+ if (data != NULL)
+ data[i] = key_slot->pdata;
- k_slot30 = k_slot20, k_slot31 = k_slot21;
- idx30 = idx20, idx31 = idx21;
- primary_bkt20 = primary_bkt10;
- primary_bkt21 = primary_bkt11;
- secondary_bkt20 = secondary_bkt10;
- secondary_bkt21 = secondary_bkt11;
- primary_hash20 = primary_hash10;
- primary_hash21 = primary_hash11;
- secondary_hash20 = secondary_hash10;
- secondary_hash21 = secondary_hash11;
- idx20 = idx10, idx21 = idx11;
- idx10 = idx00, idx11 = idx01;
-
- lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
- &primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
- lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
- &primary_bkt11, &secondary_bkt11, hash_vals, keys, h);
- lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
- secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
- key_store, h);
- lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
- secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
- key_store, h);
- lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
- lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
-
- k_slot30 = k_slot20, k_slot31 = k_slot21;
- idx30 = idx20, idx31 = idx21;
- primary_bkt20 = primary_bkt10;
- primary_bkt21 = primary_bkt11;
- secondary_bkt20 = secondary_bkt10;
- secondary_bkt21 = secondary_bkt11;
- primary_hash20 = primary_hash10;
- primary_hash21 = primary_hash11;
- secondary_hash20 = secondary_hash10;
- secondary_hash21 = secondary_hash11;
- idx20 = idx10, idx21 = idx11;
-
- lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
- secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
- key_store, h);
- lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
- secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
- key_store, h);
- lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
- lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
-
- k_slot30 = k_slot20, k_slot31 = k_slot21;
- idx30 = idx20, idx31 = idx21;
-
- lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
- lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
-
- /* ignore any items we have already found */
- extra_hits_mask &= ~hits;
-
- if (unlikely(extra_hits_mask)) {
- /* run a single search for each remaining item */
- do {
- idx = __builtin_ctzl(extra_hits_mask);
- if (data != NULL) {
- ret = rte_hash_lookup_with_hash_data(h,
- keys[idx], hash_vals[idx], &data[idx]);
- if (ret >= 0)
- hits |= 1ULL << idx;
- } else {
- positions[idx] = rte_hash_lookup_with_hash(h,
- keys[idx], hash_vals[idx]);
- if (positions[idx] >= 0)
- hits |= 1llu << idx;
+ hits |= 1ULL << i;
+ positions[i] = key_idx - 1;
+ goto next_key;
}
- extra_hits_mask &= ~(1llu << idx);
- } while (extra_hits_mask);
- }
+ sec_hitmask[i] &= ~(1 << (hit_index));
+ }
- miss_mask &= ~hits;
- if (unlikely(miss_mask)) {
- do {
- idx = __builtin_ctzl(miss_mask);
- positions[idx] = -ENOENT;
- miss_mask &= ~(1llu << idx);
- } while (miss_mask);
+next_key:
+ continue;
}
if (hit_mask != NULL)
@@ -1233,7 +1153,7 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
idx = *next % RTE_HASH_BUCKET_ENTRIES;
/* If current position is empty, go to the next one */
- while (h->buckets[bucket_idx].signatures[idx].sig == NULL_SIGNATURE) {
+ while (h->buckets[bucket_idx].key_idx[idx] == EMPTY_SLOT) {
(*next)++;
/* End of table */
if (*next == total_entries)
diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h
index 9625fffe..1b8ffed8 100644
--- a/lib/librte_hash/rte_cuckoo_hash.h
+++ b/lib/librte_hash/rte_cuckoo_hash.h
@@ -130,10 +130,12 @@ enum add_key_case {
};
/** Number of items per bucket. */
-#define RTE_HASH_BUCKET_ENTRIES 4
+#define RTE_HASH_BUCKET_ENTRIES 8
#define NULL_SIGNATURE 0
+#define EMPTY_SLOT 0
+
#define KEY_ALIGNMENT 16
#define LCORE_CACHE_SIZE 64
@@ -151,17 +153,6 @@ struct lcore_cache {
void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */
} __rte_cache_aligned;
-/* Structure storing both primary and secondary hashes */
-struct rte_hash_signatures {
- union {
- struct {
- hash_sig_t current;
- hash_sig_t alt;
- };
- uint64_t sig;
- };
-};
-
/* Structure that stores key-value pair */
struct rte_hash_key {
union {
@@ -172,11 +163,22 @@ struct rte_hash_key {
char key[0];
} __attribute__((aligned(KEY_ALIGNMENT)));
+/* All different signature compare functions */
+enum rte_hash_sig_compare_function {
+ RTE_HASH_COMPARE_SCALAR = 0,
+ RTE_HASH_COMPARE_SSE,
+ RTE_HASH_COMPARE_AVX2,
+ RTE_HASH_COMPARE_NUM
+};
+
/** Bucket structure */
struct rte_hash_bucket {
- struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES];
- /* Includes dummy key index that always contains index 0 */
- uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1];
+ hash_sig_t sig_current[RTE_HASH_BUCKET_ENTRIES];
+
+ uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES];
+
+ hash_sig_t sig_alt[RTE_HASH_BUCKET_ENTRIES];
+
uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
} __rte_cache_aligned;
@@ -185,30 +187,38 @@ struct rte_hash {
char name[RTE_HASH_NAMESIZE]; /**< Name of the hash. */
uint32_t entries; /**< Total table entries. */
uint32_t num_buckets; /**< Number of buckets in table. */
- uint32_t key_len; /**< Length of hash key. */
+
+ struct rte_ring *free_slots;
+ /**< Ring that stores all indexes of the free slots in the key table */
+ uint8_t hw_trans_mem_support;
+ /**< Hardware transactional memory support */
+ struct lcore_cache *local_free_slots;
+ /**< Local cache per lcore, storing some indexes of the free slots */
+ enum add_key_case add_key; /**< Multi-writer hash add behavior */
+
+ rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */
+
+ /* Fields used in lookup */
+
+ uint32_t key_len __rte_cache_aligned;
+ /**< Length of hash key. */
rte_hash_function hash_func; /**< Function used to calculate hash. */
uint32_t hash_func_init_val; /**< Init value used by hash_func. */
rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
/**< Custom function used to compare keys. */
enum cmp_jump_table_case cmp_jump_table_idx;
/**< Indicates which compare function to use. */
- uint32_t bucket_bitmask; /**< Bitmask for getting bucket index
- from hash signature. */
+ enum rte_hash_sig_compare_function sig_cmp_fn;
+ /**< Indicates which signature compare function to use. */
+ uint32_t bucket_bitmask;
+ /**< Bitmask for getting bucket index from hash signature. */
uint32_t key_entry_size; /**< Size of each key entry. */
- struct rte_ring *free_slots; /**< Ring that stores all indexes
- of the free slots in the key table */
void *key_store; /**< Table storing all keys and data */
- struct rte_hash_bucket *buckets; /**< Table with buckets storing all the
- hash values and key indexes
- to the key table*/
- uint8_t hw_trans_mem_support; /**< Hardware transactional
- memory support */
- struct lcore_cache *local_free_slots;
- /**< Local cache per lcore, storing some indexes of the free slots */
- enum add_key_case add_key; /**< Multi-writer hash add behavior */
-
- rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */
+ struct rte_hash_bucket *buckets;
+ /**< Table with buckets storing all the hash values and key indexes
+ * to the key table.
+ */
} __rte_cache_aligned;
struct queue_node {
diff --git a/lib/librte_hash/rte_cuckoo_hash_x86.h b/lib/librte_hash/rte_cuckoo_hash_x86.h
index ace1bd2e..0c94244a 100644
--- a/lib/librte_hash/rte_cuckoo_hash_x86.h
+++ b/lib/librte_hash/rte_cuckoo_hash_x86.h
@@ -53,10 +53,9 @@ rte_hash_cuckoo_insert_mw_tm(struct rte_hash_bucket *prim_bkt,
*/
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
/* Check if slot is available */
- if (likely(prim_bkt->signatures[i].sig ==
- NULL_SIGNATURE)) {
- prim_bkt->signatures[i].current = sig;
- prim_bkt->signatures[i].alt = alt_hash;
+ if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
+ prim_bkt->sig_current[i] = sig;
+ prim_bkt->sig_alt[i] = alt_hash;
prim_bkt->key_idx[i] = new_idx;
break;
}
@@ -102,7 +101,7 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
prev_slot = curr_node->prev_slot;
prev_alt_bkt_idx
- = prev_bkt->signatures[prev_slot].alt
+ = prev_bkt->sig_alt[prev_slot]
& h->bucket_bitmask;
if (unlikely(&h->buckets[prev_alt_bkt_idx]
@@ -114,10 +113,10 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
* Cuckoo insert to move elements back to its
* primary bucket if available
*/
- curr_bkt->signatures[curr_slot].alt =
- prev_bkt->signatures[prev_slot].current;
- curr_bkt->signatures[curr_slot].current =
- prev_bkt->signatures[prev_slot].alt;
+ curr_bkt->sig_alt[curr_slot] =
+ prev_bkt->sig_current[prev_slot];
+ curr_bkt->sig_current[curr_slot] =
+ prev_bkt->sig_alt[prev_slot];
curr_bkt->key_idx[curr_slot]
= prev_bkt->key_idx[prev_slot];
@@ -126,8 +125,8 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
curr_bkt = curr_node->bkt;
}
- curr_bkt->signatures[curr_slot].current = sig;
- curr_bkt->signatures[curr_slot].alt = alt_hash;
+ curr_bkt->sig_current[curr_slot] = sig;
+ curr_bkt->sig_alt[curr_slot] = alt_hash;
curr_bkt->key_idx[curr_slot] = new_idx;
rte_xend();
@@ -172,7 +171,7 @@ rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h,
RTE_HASH_BUCKET_ENTRIES)) {
curr_bkt = tail->bkt;
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
- if (curr_bkt->signatures[i].sig == NULL_SIGNATURE) {
+ if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
if (likely(rte_hash_cuckoo_move_insert_mw_tm(h,
tail, i, sig,
alt_hash, new_idx) == 0))
@@ -180,7 +179,7 @@ rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h,
}
/* Enqueue new node and keep prev node info */
- alt_bkt = &(h->buckets[curr_bkt->signatures[i].alt
+ alt_bkt = &(h->buckets[curr_bkt->sig_alt[i]
& h->bucket_bitmask]);
head->bkt = alt_bkt;
head->prev = tail;
diff --git a/lib/librte_hash/rte_fbk_hash.h b/lib/librte_hash/rte_fbk_hash.h
index a430961d..bd46048f 100644
--- a/lib/librte_hash/rte_fbk_hash.h
+++ b/lib/librte_hash/rte_fbk_hash.h
@@ -115,7 +115,7 @@ struct rte_fbk_hash_table {
uint32_t init_val; /**< For initialising hash function. */
/** A flat table of all buckets. */
- union rte_fbk_hash_entry t[0];
+ union rte_fbk_hash_entry t[];
};
/**
diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
index d98e98e7..a4886a8c 100644
--- a/lib/librte_hash/rte_thash.h
+++ b/lib/librte_hash/rte_thash.h
@@ -54,6 +54,7 @@ extern "C" {
#include <stdint.h>
#include <rte_byteorder.h>
#include <rte_ip.h>
+#include <rte_common.h>
#ifdef __SSE3__
#include <rte_vect.h>
@@ -102,6 +103,7 @@ static const __m128i rte_thash_ipv6_bswap_mask = {
struct rte_ipv4_tuple {
uint32_t src_addr;
uint32_t dst_addr;
+ RTE_STD_C11
union {
struct {
uint16_t dport;
@@ -119,6 +121,7 @@ struct rte_ipv4_tuple {
struct rte_ipv6_tuple {
uint8_t src_addr[16];
uint8_t dst_addr[16];
+ RTE_STD_C11
union {
struct {
uint16_t dport;
diff --git a/lib/librte_ip_frag/Makefile b/lib/librte_ip_frag/Makefile
index e97dfbd3..43f8b1e3 100644
--- a/lib/librte_ip_frag/Makefile
+++ b/lib/librte_ip_frag/Makefile
@@ -54,6 +54,7 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += rte_ip_frag.h
DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_eal
DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_hash
DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mbuf
DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mempool
diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h
index 9ac7081c..6708906d 100644
--- a/lib/librte_ip_frag/rte_ip_frag.h
+++ b/lib/librte_ip_frag/rte_ip_frag.h
@@ -124,7 +124,7 @@ struct rte_ip_frag_tbl {
struct ip_frag_pkt *last; /**< last used entry. */
struct ip_pkt_list lru; /**< LRU list for table entries. */
struct ip_frag_tbl_stat stat; /**< statistics counters. */
- struct ip_frag_pkt pkt[0]; /**< hash table. */
+ __extension__ struct ip_frag_pkt pkt[0]; /**< hash table. */
};
/** IPv6 fragment extension header */
diff --git a/lib/librte_ivshmem/Makefile b/lib/librte_ivshmem/Makefile
deleted file mode 100644
index c099438c..00000000
--- a/lib/librte_ivshmem/Makefile
+++ /dev/null
@@ -1,54 +0,0 @@
-# BSD LICENSE
-#
-# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in
-# the documentation and/or other materials provided with the
-# distribution.
-# * Neither the name of Intel Corporation nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include $(RTE_SDK)/mk/rte.vars.mk
-
-# library name
-LIB = librte_ivshmem.a
-
-CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
-
-EXPORT_MAP := rte_ivshmem_version.map
-
-LIBABIVER := 1
-
-# all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_IVSHMEM) := rte_ivshmem.c
-
-# install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_IVSHMEM)-include := rte_ivshmem.h
-
-# this lib needs EAL, ring and mempool
-DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_eal
-DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_ring
-DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_mempool
-
-include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ivshmem/rte_ivshmem.c b/lib/librte_ivshmem/rte_ivshmem.c
deleted file mode 100644
index c26edb61..00000000
--- a/lib/librte_ivshmem/rte_ivshmem.c
+++ /dev/null
@@ -1,919 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include <fcntl.h>
-#include <limits.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <string.h>
-#include <stdio.h>
-
-#include <rte_eal_memconfig.h>
-#include <rte_memory.h>
-#include <rte_ivshmem.h>
-#include <rte_string_fns.h>
-#include <rte_common.h>
-#include <rte_log.h>
-#include <rte_debug.h>
-#include <rte_spinlock.h>
-#include <rte_common.h>
-#include <rte_malloc.h>
-
-#include "rte_ivshmem.h"
-
-#define IVSHMEM_CONFIG_FILE_FMT "/var/run/.dpdk_ivshmem_metadata_%s"
-#define IVSHMEM_QEMU_CMD_LINE_HEADER_FMT "-device ivshmem,size=%" PRIu64 "M,shm=fd%s"
-#define IVSHMEM_QEMU_CMD_FD_FMT ":%s:0x%" PRIx64 ":0x%" PRIx64
-#define IVSHMEM_QEMU_CMDLINE_BUFSIZE 1024
-#define IVSHMEM_MAX_PAGES (1 << 12)
-#define adjacent(x,y) (((x).phys_addr+(x).len)==(y).phys_addr)
-#define METADATA_SIZE_ALIGNED \
- (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz))
-
-#define GET_PAGEMAP_ADDR(in,addr,dlm,err) \
-{ \
- char *end; \
- errno = 0; \
- addr = strtoull((in), &end, 16); \
- if (errno != 0 || *end != (dlm)) { \
- RTE_LOG(ERR, EAL, err); \
- goto error; \
- } \
- (in) = end + 1; \
-}
-
-static int pagesz;
-
-struct memseg_cache_entry {
- char filepath[PATH_MAX];
- uint64_t offset;
- uint64_t len;
-};
-
-struct ivshmem_config {
- struct rte_ivshmem_metadata * metadata;
- struct memseg_cache_entry memseg_cache[IVSHMEM_MAX_PAGES];
- /**< account for multiple files per segment case */
- struct flock lock;
- rte_spinlock_t sl;
-};
-
-static struct ivshmem_config
-ivshmem_global_config[RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES];
-
-static rte_spinlock_t global_cfg_sl;
-
-static struct ivshmem_config *
-get_config_by_name(const char * name)
-{
- struct rte_ivshmem_metadata * config;
- unsigned i;
-
- for (i = 0; i < RTE_DIM(ivshmem_global_config); i++) {
- config = ivshmem_global_config[i].metadata;
- if (config == NULL)
- return NULL;
- if (strncmp(name, config->name, IVSHMEM_NAME_LEN) == 0)
- return &ivshmem_global_config[i];
- }
-
- return NULL;
-}
-
-static int
-overlap(const struct rte_memzone * s1, const struct rte_memzone * s2)
-{
- uint64_t start1, end1, start2, end2;
-
- start1 = s1->addr_64;
- end1 = s1->addr_64 + s1->len;
- start2 = s2->addr_64;
- end2 = s2->addr_64 + s2->len;
-
- if (start1 >= start2 && start1 < end2)
- return 1;
- if (start2 >= start1 && start2 < end1)
- return 1;
-
- return 0;
-}
-
-static struct rte_memzone *
-get_memzone_by_addr(const void * addr)
-{
- struct rte_memzone * tmp, * mz;
- struct rte_mem_config * mcfg;
- int i;
-
- mcfg = rte_eal_get_configuration()->mem_config;
- mz = NULL;
-
- /* find memzone for the ring */
- for (i = 0; i < RTE_MAX_MEMZONE; i++) {
- tmp = &mcfg->memzone[i];
-
- if (tmp->addr_64 == (uint64_t) addr) {
- mz = tmp;
- break;
- }
- }
-
- return mz;
-}
-
-static int
-entry_compare(const void * a, const void * b)
-{
- const struct rte_ivshmem_metadata_entry * e1 =
- (const struct rte_ivshmem_metadata_entry*) a;
- const struct rte_ivshmem_metadata_entry * e2 =
- (const struct rte_ivshmem_metadata_entry*) b;
-
- /* move unallocated zones to the end */
- if (e1->mz.addr == NULL && e2->mz.addr == NULL)
- return 0;
- if (e1->mz.addr == 0)
- return 1;
- if (e2->mz.addr == 0)
- return -1;
-
- return e1->mz.phys_addr > e2->mz.phys_addr;
-}
-
-/* fills hugepage cache entry for a given start virt_addr */
-static int
-get_hugefile_by_virt_addr(uint64_t virt_addr, struct memseg_cache_entry * e)
-{
- uint64_t start_addr, end_addr;
- char *start,*path_end;
- char buf[PATH_MAX*2];
- FILE *f;
-
- start = NULL;
- path_end = NULL;
- start_addr = 0;
-
- memset(e->filepath, 0, sizeof(e->filepath));
-
- /* open /proc/self/maps */
- f = fopen("/proc/self/maps", "r");
- if (f == NULL) {
- RTE_LOG(ERR, EAL, "cannot open /proc/self/maps!\n");
- return -1;
- }
-
- /* parse maps */
- while (fgets(buf, sizeof(buf), f) != NULL) {
-
- /* get endptr to end of start addr */
- start = buf;
-
- GET_PAGEMAP_ADDR(start,start_addr,'-',
- "Cannot find start address in maps!\n");
-
- /* if start address is bigger than our address, skip */
- if (start_addr > virt_addr)
- continue;
-
- GET_PAGEMAP_ADDR(start,end_addr,' ',
- "Cannot find end address in maps!\n");
-
- /* if end address is less than our address, skip */
- if (end_addr <= virt_addr)
- continue;
-
- /* find where the path starts */
- start = strstr(start, "/");
-
- if (start == NULL)
- continue;
-
- /* at this point, we know that this is our map.
- * now let's find the file */
- path_end = strstr(start, "\n");
- break;
- }
-
- if (path_end == NULL) {
- RTE_LOG(ERR, EAL, "Hugefile path not found!\n");
- goto error;
- }
-
- /* calculate offset and copy the file path */
- snprintf(e->filepath, RTE_PTR_DIFF(path_end, start) + 1, "%s", start);
-
- e->offset = virt_addr - start_addr;
-
- fclose(f);
-
- return 0;
-error:
- fclose(f);
- return -1;
-}
-
-/*
- * This is a complex function. What it does is the following:
- * 1. Goes through metadata and gets list of hugepages involved
- * 2. Sorts the hugepages by size (1G first)
- * 3. Goes through metadata again and writes correct offsets
- * 4. Goes through pages and finds out their filenames, offsets etc.
- */
-static int
-build_config(struct rte_ivshmem_metadata * metadata)
-{
- struct rte_ivshmem_metadata_entry * e_local;
- struct memseg_cache_entry * ms_local;
- struct rte_memseg pages[IVSHMEM_MAX_PAGES];
- struct rte_ivshmem_metadata_entry *entry;
- struct memseg_cache_entry * c_entry, * prev_entry;
- struct ivshmem_config * config;
- unsigned i, j, mz_iter, ms_iter;
- uint64_t biggest_len;
- int biggest_idx;
-
- /* return error if we try to use an unknown config file */
- config = get_config_by_name(metadata->name);
- if (config == NULL) {
- RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", metadata->name);
- goto fail_e;
- }
-
- memset(pages, 0, sizeof(pages));
-
- e_local = malloc(sizeof(config->metadata->entry));
- if (e_local == NULL)
- goto fail_e;
- ms_local = malloc(sizeof(config->memseg_cache));
- if (ms_local == NULL)
- goto fail_ms;
-
-
- /* make local copies before doing anything */
- memcpy(e_local, config->metadata->entry, sizeof(config->metadata->entry));
- memcpy(ms_local, config->memseg_cache, sizeof(config->memseg_cache));
-
- qsort(e_local, RTE_DIM(config->metadata->entry), sizeof(struct rte_ivshmem_metadata_entry),
- entry_compare);
-
- /* first pass - collect all huge pages */
- for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {
-
- entry = &e_local[mz_iter];
-
- uint64_t start_addr = RTE_ALIGN_FLOOR(entry->mz.addr_64,
- entry->mz.hugepage_sz);
- uint64_t offset = entry->mz.addr_64 - start_addr;
- uint64_t len = RTE_ALIGN_CEIL(entry->mz.len + offset,
- entry->mz.hugepage_sz);
-
- if (entry->mz.addr_64 == 0 || start_addr == 0 || len == 0)
- continue;
-
- int start_page;
-
- /* find first unused page - mz are phys_addr sorted so we don't have to
- * look out for holes */
- for (i = 0; i < RTE_DIM(pages); i++) {
-
- /* skip if we already have this page */
- if (pages[i].addr_64 == start_addr) {
- start_addr += entry->mz.hugepage_sz;
- len -= entry->mz.hugepage_sz;
- continue;
- }
- /* we found a new page */
- else if (pages[i].addr_64 == 0) {
- start_page = i;
- break;
- }
- }
- if (i == RTE_DIM(pages)) {
- RTE_LOG(ERR, EAL, "Cannot find unused page!\n");
- goto fail;
- }
-
- /* populate however many pages the memzone has */
- for (i = start_page; i < RTE_DIM(pages) && len != 0; i++) {
-
- pages[i].addr_64 = start_addr;
- pages[i].len = entry->mz.hugepage_sz;
- start_addr += entry->mz.hugepage_sz;
- len -= entry->mz.hugepage_sz;
- }
- /* if there's still length left */
- if (len != 0) {
- RTE_LOG(ERR, EAL, "Not enough space for pages!\n");
- goto fail;
- }
- }
-
- /* second pass - sort pages by size */
- for (i = 0; i < RTE_DIM(pages); i++) {
-
- if (pages[i].addr == NULL)
- break;
-
- biggest_len = 0;
- biggest_idx = -1;
-
- /*
- * browse all entries starting at 'i', and find the
- * entry with the smallest addr
- */
- for (j=i; j< RTE_DIM(pages); j++) {
- if (pages[j].addr == NULL)
- break;
- if (biggest_len == 0 ||
- pages[j].len > biggest_len) {
- biggest_len = pages[j].len;
- biggest_idx = j;
- }
- }
-
- /* should not happen */
- if (biggest_idx == -1) {
- RTE_LOG(ERR, EAL, "Error sorting by size!\n");
- goto fail;
- }
- if (i != (unsigned) biggest_idx) {
- struct rte_memseg tmp;
-
- memcpy(&tmp, &pages[biggest_idx], sizeof(struct rte_memseg));
-
- /* we don't want to break contiguousness, so instead of just
- * swapping segments, we move all the preceding segments to the
- * right and then put the old segment @ biggest_idx in place of
- * segment @ i */
- for (j = biggest_idx - 1; j >= i; j--) {
- memcpy(&pages[j+1], &pages[j], sizeof(struct rte_memseg));
- memset(&pages[j], 0, sizeof(struct rte_memseg));
- if (j == 0)
- break;
- }
-
- /* put old biggest segment to its new place */
- memcpy(&pages[i], &tmp, sizeof(struct rte_memseg));
- }
- }
-
- /* third pass - write correct offsets */
- for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {
-
- uint64_t offset = 0;
-
- entry = &e_local[mz_iter];
-
- if (entry->mz.addr_64 == 0)
- break;
-
- /* find page for current memzone */
- for (i = 0; i < RTE_DIM(pages); i++) {
- /* we found our page */
- if (entry->mz.addr_64 >= pages[i].addr_64 &&
- entry->mz.addr_64 < pages[i].addr_64 + pages[i].len) {
- entry->offset = (entry->mz.addr_64 - pages[i].addr_64) +
- offset;
- break;
- }
- offset += pages[i].len;
- }
- if (i == RTE_DIM(pages)) {
- RTE_LOG(ERR, EAL, "Page not found!\n");
- goto fail;
- }
- }
-
- ms_iter = 0;
- prev_entry = NULL;
-
- /* fourth pass - create proper memseg cache */
- for (i = 0; i < RTE_DIM(pages) &&
- ms_iter <= RTE_DIM(config->memseg_cache); i++) {
- if (pages[i].addr_64 == 0)
- break;
-
-
- if (ms_iter == RTE_DIM(pages)) {
- RTE_LOG(ERR, EAL, "The universe has collapsed!\n");
- goto fail;
- }
-
- c_entry = &ms_local[ms_iter];
- c_entry->len = pages[i].len;
-
- if (get_hugefile_by_virt_addr(pages[i].addr_64, c_entry) < 0)
- goto fail;
-
- /* if previous entry has the same filename and is contiguous,
- * clear current entry and increase previous entry's length
- */
- if (prev_entry != NULL &&
- strncmp(c_entry->filepath, prev_entry->filepath,
- sizeof(c_entry->filepath)) == 0 &&
- prev_entry->offset + prev_entry->len == c_entry->offset) {
- prev_entry->len += pages[i].len;
- memset(c_entry, 0, sizeof(struct memseg_cache_entry));
- }
- else {
- prev_entry = c_entry;
- ms_iter++;
- }
- }
-
- /* update current configuration with new valid data */
- memcpy(config->metadata->entry, e_local, sizeof(config->metadata->entry));
- memcpy(config->memseg_cache, ms_local, sizeof(config->memseg_cache));
-
- free(ms_local);
- free(e_local);
-
- return 0;
-fail:
- free(ms_local);
-fail_ms:
- free(e_local);
-fail_e:
- return -1;
-}
-
-static int
-add_memzone_to_metadata(const struct rte_memzone * mz,
- struct ivshmem_config * config)
-{
- struct rte_ivshmem_metadata_entry * entry;
- unsigned i, idx;
- struct rte_mem_config *mcfg;
-
- if (mz->len == 0) {
- RTE_LOG(ERR, EAL, "Trying to add an empty memzone\n");
- return -1;
- }
-
- rte_spinlock_lock(&config->sl);
-
- mcfg = rte_eal_get_configuration()->mem_config;
-
- /* it prevents the memzone being freed while we add it to the metadata */
- rte_rwlock_write_lock(&mcfg->mlock);
-
- /* find free slot in this config */
- for (i = 0; i < RTE_DIM(config->metadata->entry); i++) {
- entry = &config->metadata->entry[i];
-
- if (&entry->mz.addr_64 != 0 && overlap(mz, &entry->mz)) {
- RTE_LOG(ERR, EAL, "Overlapping memzones!\n");
- goto fail;
- }
-
- /* if addr is zero, the memzone is probably free */
- if (entry->mz.addr_64 == 0) {
- RTE_LOG(DEBUG, EAL, "Adding memzone '%s' at %p to metadata %s\n",
- mz->name, mz->addr, config->metadata->name);
- memcpy(&entry->mz, mz, sizeof(struct rte_memzone));
-
- /* run config file parser */
- if (build_config(config->metadata) < 0)
- goto fail;
-
- break;
- }
- }
-
- /* if we reached the maximum, that means we have no place in config */
- if (i == RTE_DIM(config->metadata->entry)) {
- RTE_LOG(ERR, EAL, "No space left in IVSHMEM metadata %s!\n",
- config->metadata->name);
- goto fail;
- }
-
- idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone);
- idx = idx / sizeof(struct rte_memzone);
-
- /* mark the memzone not freeable */
- mcfg->memzone[idx].ioremap_addr = mz->phys_addr;
-
- rte_rwlock_write_unlock(&mcfg->mlock);
- rte_spinlock_unlock(&config->sl);
- return 0;
-fail:
- rte_rwlock_write_unlock(&mcfg->mlock);
- rte_spinlock_unlock(&config->sl);
- return -1;
-}
-
-static int
-add_ring_to_metadata(const struct rte_ring * r,
- struct ivshmem_config * config)
-{
- struct rte_memzone * mz;
-
- mz = get_memzone_by_addr(r);
-
- if (!mz) {
- RTE_LOG(ERR, EAL, "Cannot find memzone for ring!\n");
- return -1;
- }
-
- return add_memzone_to_metadata(mz, config);
-}
-
-static int
-add_mempool_memzone_to_metadata(const void *addr,
- struct ivshmem_config *config)
-{
- struct rte_memzone *mz;
-
- mz = get_memzone_by_addr(addr);
-
- if (!mz) {
- RTE_LOG(ERR, EAL, "Cannot find memzone for mempool!\n");
- return -1;
- }
-
- return add_memzone_to_metadata(mz, config);
-}
-
-static int
-add_mempool_to_metadata(const struct rte_mempool *mp,
- struct ivshmem_config *config)
-{
- struct rte_mempool_memhdr *memhdr;
- int ret;
-
- ret = add_mempool_memzone_to_metadata(mp, config);
- if (ret < 0)
- return -1;
-
- STAILQ_FOREACH(memhdr, &mp->mem_list, next) {
- ret = add_mempool_memzone_to_metadata(memhdr->addr, config);
- if (ret < 0)
- return -1;
- }
-
- /* mempool consists of memzone and ring */
- return add_ring_to_metadata(mp->pool_data, config);
-}
-
-int
-rte_ivshmem_metadata_add_ring(const struct rte_ring * r, const char * name)
-{
- struct ivshmem_config * config;
-
- if (name == NULL || r == NULL)
- return -1;
-
- config = get_config_by_name(name);
-
- if (config == NULL) {
- RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
- return -1;
- }
-
- return add_ring_to_metadata(r, config);
-}
-
-int
-rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz, const char * name)
-{
- struct ivshmem_config * config;
-
- if (name == NULL || mz == NULL)
- return -1;
-
- config = get_config_by_name(name);
-
- if (config == NULL) {
- RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
- return -1;
- }
-
- return add_memzone_to_metadata(mz, config);
-}
-
-int
-rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp, const char * name)
-{
- struct ivshmem_config * config;
-
- if (name == NULL || mp == NULL)
- return -1;
-
- config = get_config_by_name(name);
-
- if (config == NULL) {
- RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
- return -1;
- }
-
- return add_mempool_to_metadata(mp, config);
-}
-
-static inline void
-ivshmem_config_path(char *buffer, size_t bufflen, const char *name)
-{
- snprintf(buffer, bufflen, IVSHMEM_CONFIG_FILE_FMT, name);
-}
-
-
-
-static inline
-void *ivshmem_metadata_create(const char *name, size_t size,
- struct flock *lock)
-{
- int retval, fd;
- void *metadata_addr;
- char pathname[PATH_MAX];
-
- ivshmem_config_path(pathname, sizeof(pathname), name);
-
- fd = open(pathname, O_RDWR | O_CREAT, 0660);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open '%s'\n", pathname);
- return NULL;
- }
-
- size = METADATA_SIZE_ALIGNED;
-
- retval = fcntl(fd, F_SETLK, lock);
- if (retval < 0){
- close(fd);
- RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another "
- "process using it?\n", pathname);
- return NULL;
- }
-
- retval = ftruncate(fd, size);
- if (retval < 0){
- close(fd);
- RTE_LOG(ERR, EAL, "Cannot resize '%s'\n", pathname);
- return NULL;
- }
-
- metadata_addr = mmap(NULL, size,
- PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-
- if (metadata_addr == MAP_FAILED){
- RTE_LOG(ERR, EAL, "Cannot mmap memory for '%s'\n", pathname);
-
- /* we don't care if we can't unlock */
- fcntl(fd, F_UNLCK, lock);
- close(fd);
-
- return NULL;
- }
-
- return metadata_addr;
-}
-
-int rte_ivshmem_metadata_create(const char *name)
-{
- struct ivshmem_config * ivshmem_config;
- unsigned index;
-
- if (pagesz == 0)
- pagesz = getpagesize();
-
- if (name == NULL)
- return -1;
-
- rte_spinlock_lock(&global_cfg_sl);
-
- for (index = 0; index < RTE_DIM(ivshmem_global_config); index++) {
- if (ivshmem_global_config[index].metadata == NULL) {
- ivshmem_config = &ivshmem_global_config[index];
- break;
- }
- }
-
- if (index == RTE_DIM(ivshmem_global_config)) {
- RTE_LOG(ERR, EAL, "Cannot create more ivshmem config files. "
- "Maximum has been reached\n");
- rte_spinlock_unlock(&global_cfg_sl);
- return -1;
- }
-
- ivshmem_config->lock.l_type = F_WRLCK;
- ivshmem_config->lock.l_whence = SEEK_SET;
-
- ivshmem_config->lock.l_start = 0;
- ivshmem_config->lock.l_len = METADATA_SIZE_ALIGNED;
-
- ivshmem_global_config[index].metadata = ((struct rte_ivshmem_metadata *)
- ivshmem_metadata_create(
- name,
- sizeof(struct rte_ivshmem_metadata),
- &ivshmem_config->lock));
-
- if (ivshmem_global_config[index].metadata == NULL) {
- rte_spinlock_unlock(&global_cfg_sl);
- return -1;
- }
-
- /* Metadata setup */
- memset(ivshmem_config->metadata, 0, sizeof(struct rte_ivshmem_metadata));
- ivshmem_config->metadata->magic_number = IVSHMEM_MAGIC;
- snprintf(ivshmem_config->metadata->name,
- sizeof(ivshmem_config->metadata->name), "%s", name);
-
- rte_spinlock_unlock(&global_cfg_sl);
-
- return 0;
-}
-
-int
-rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, const char *name)
-{
- const struct memseg_cache_entry * ms_cache, *entry;
- struct ivshmem_config * config;
- char cmdline[IVSHMEM_QEMU_CMDLINE_BUFSIZE], *cmdline_ptr;
- char cfg_file_path[PATH_MAX];
- unsigned remaining_len, tmplen, iter;
- uint64_t shared_mem_size, zero_size, total_size;
-
- if (buffer == NULL || name == NULL)
- return -1;
-
- config = get_config_by_name(name);
-
- if (config == NULL) {
- RTE_LOG(ERR, EAL, "Config %s not found!\n", name);
- return -1;
- }
-
- rte_spinlock_lock(&config->sl);
-
- /* prepare metadata file path */
- snprintf(cfg_file_path, sizeof(cfg_file_path), IVSHMEM_CONFIG_FILE_FMT,
- config->metadata->name);
-
- ms_cache = config->memseg_cache;
-
- cmdline_ptr = cmdline;
- remaining_len = sizeof(cmdline);
-
- shared_mem_size = 0;
- iter = 0;
-
- while ((ms_cache[iter].len != 0) && (iter < RTE_DIM(config->metadata->entry))) {
-
- entry = &ms_cache[iter];
-
- /* Offset and sizes within the current pathname */
- tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
- entry->filepath, entry->offset, entry->len);
-
- shared_mem_size += entry->len;
-
- cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
- remaining_len -= tmplen;
-
- if (remaining_len == 0) {
- RTE_LOG(ERR, EAL, "Command line too long!\n");
- rte_spinlock_unlock(&config->sl);
- return -1;
- }
-
- iter++;
- }
-
- total_size = rte_align64pow2(shared_mem_size + METADATA_SIZE_ALIGNED);
- zero_size = total_size - shared_mem_size - METADATA_SIZE_ALIGNED;
-
- /* add /dev/zero to command-line to fill the space */
- tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
- "/dev/zero",
- (uint64_t)0x0,
- zero_size);
-
- cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
- remaining_len -= tmplen;
-
- if (remaining_len == 0) {
- RTE_LOG(ERR, EAL, "Command line too long!\n");
- rte_spinlock_unlock(&config->sl);
- return -1;
- }
-
- /* add metadata file to the end of command-line */
- tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
- cfg_file_path,
- (uint64_t)0x0,
- METADATA_SIZE_ALIGNED);
-
- cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
- remaining_len -= tmplen;
-
- if (remaining_len == 0) {
- RTE_LOG(ERR, EAL, "Command line too long!\n");
- rte_spinlock_unlock(&config->sl);
- return -1;
- }
-
- /* if current length of the command line is bigger than the buffer supplied
- * by the user, or if command-line is bigger than what IVSHMEM accepts */
- if ((sizeof(cmdline) - remaining_len) > size) {
- RTE_LOG(ERR, EAL, "Buffer is too short!\n");
- rte_spinlock_unlock(&config->sl);
- return -1;
- }
- /* complete the command-line */
- snprintf(buffer, size,
- IVSHMEM_QEMU_CMD_LINE_HEADER_FMT,
- total_size >> 20,
- cmdline);
-
- rte_spinlock_unlock(&config->sl);
-
- return 0;
-}
-
-void
-rte_ivshmem_metadata_dump(FILE *f, const char *name)
-{
- unsigned i = 0;
- struct ivshmem_config * config;
- struct rte_ivshmem_metadata_entry *entry;
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
- uint64_t addr;
- uint64_t end, hugepage_sz;
- struct memseg_cache_entry e;
-#endif
-
- if (name == NULL)
- return;
-
- /* return error if we try to use an unknown config file */
- config = get_config_by_name(name);
- if (config == NULL) {
- RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
- return;
- }
-
- rte_spinlock_lock(&config->sl);
-
- entry = &config->metadata->entry[0];
-
- while (entry->mz.addr != NULL && i < RTE_DIM(config->metadata->entry)) {
-
- fprintf(f, "Entry %u: name:<%-20s>, phys:0x%-15lx, len:0x%-15lx, "
- "virt:%-15p, off:0x%-15lx\n",
- i,
- entry->mz.name,
- entry->mz.phys_addr,
- entry->mz.len,
- entry->mz.addr,
- entry->offset);
- i++;
-
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
- fprintf(f, "\tHugepage files:\n");
-
- hugepage_sz = entry->mz.hugepage_sz;
- addr = RTE_ALIGN_FLOOR(entry->mz.addr_64, hugepage_sz);
- end = addr + RTE_ALIGN_CEIL(entry->mz.len + (entry->mz.addr_64 - addr),
- hugepage_sz);
-
- for (; addr < end; addr += hugepage_sz) {
- memset(&e, 0, sizeof(e));
-
- get_hugefile_by_virt_addr(addr, &e);
-
- fprintf(f, "\t0x%"PRIx64 "-0x%" PRIx64 " offset: 0x%" PRIx64 " %s\n",
- addr, addr + hugepage_sz, e.offset, e.filepath);
- }
-#endif
- entry++;
- }
-
- rte_spinlock_unlock(&config->sl);
-}
diff --git a/lib/librte_ivshmem/rte_ivshmem.h b/lib/librte_ivshmem/rte_ivshmem.h
deleted file mode 100644
index a5d36d6b..00000000
--- a/lib/librte_ivshmem/rte_ivshmem.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef RTE_IVSHMEM_H_
-#define RTE_IVSHMEM_H_
-
-#include <rte_memzone.h>
-#include <rte_mempool.h>
-
-/**
- * @file
- *
- * The RTE IVSHMEM interface provides functions to create metadata files
- * describing memory segments to be shared via QEMU IVSHMEM.
- */
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define IVSHMEM_MAGIC 0x0BADC0DE
-#define IVSHMEM_NAME_LEN 32
-
-/**
- * Structure that holds IVSHMEM shared metadata entry.
- */
-struct rte_ivshmem_metadata_entry {
- struct rte_memzone mz; /**< shared memzone */
- uint64_t offset; /**< offset of memzone within IVSHMEM device */
-};
-
-/**
- * Structure that holds IVSHMEM metadata.
- */
-struct rte_ivshmem_metadata {
- int magic_number; /**< magic number */
- char name[IVSHMEM_NAME_LEN]; /**< name of the metadata file */
- struct rte_ivshmem_metadata_entry entry[RTE_LIBRTE_IVSHMEM_MAX_ENTRIES];
- /**< metadata entries */
-};
-
-/**
- * Creates metadata file with a given name
- *
- * @param name
- * Name of metadata file to be created
- *
- * @return
- * - On success, zero
- * - On failure, a negative value
- */
-int rte_ivshmem_metadata_create(const char * name);
-
-/**
- * Adds memzone to a specific metadata file
- *
- * @param mz
- * Memzone to be added
- * @param md_name
- * Name of metadata file for the memzone to be added to
- *
- * @return
- * - On success, zero
- * - On failure, a negative value
- */
-int rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz,
- const char * md_name);
-
-/**
- * Adds a ring descriptor to a specific metadata file
- *
- * @param r
- * Ring descriptor to be added
- * @param md_name
- * Name of metadata file for the ring to be added to
- *
- * @return
- * - On success, zero
- * - On failure, a negative value
- */
-int rte_ivshmem_metadata_add_ring(const struct rte_ring * r,
- const char * md_name);
-
-/**
- * Adds a mempool to a specific metadata file
- *
- * @param mp
- * Mempool to be added
- * @param md_name
- * Name of metadata file for the mempool to be added to
- *
- * @return
- * - On success, zero
- * - On failure, a negative value
- */
-int rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp,
- const char * md_name);
-
-
-/**
- * Generates the QEMU command-line for IVSHMEM device for a given metadata file.
- * This function is to be called after all the objects were added.
- *
- * @param buffer
- * Buffer to be filled with the command line arguments.
- * @param size
- * Size of the buffer.
- * @param name
- * Name of metadata file to generate QEMU command-line parameters for
- *
- * @return
- * - On success, zero
- * - On failure, a negative value
- */
-int rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size,
- const char *name);
-
-
-/**
- * Dump all metadata entries from a given metadata file to the console.
- *
- * @param f
- * A pointer to a file for output
- * @name
- * Name of the metadata file to be dumped to console.
- */
-void rte_ivshmem_metadata_dump(FILE *f, const char *name);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* RTE_IVSHMEM_H_ */
diff --git a/lib/librte_ivshmem/rte_ivshmem_version.map b/lib/librte_ivshmem/rte_ivshmem_version.map
deleted file mode 100644
index 5a393ddc..00000000
--- a/lib/librte_ivshmem/rte_ivshmem_version.map
+++ /dev/null
@@ -1,12 +0,0 @@
-DPDK_2.0 {
- global:
-
- rte_ivshmem_metadata_add_mempool;
- rte_ivshmem_metadata_add_memzone;
- rte_ivshmem_metadata_add_ring;
- rte_ivshmem_metadata_cmdline_generate;
- rte_ivshmem_metadata_create;
- rte_ivshmem_metadata_dump;
-
- local: *;
-};
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 3028fd43..a80cefd2 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -210,14 +210,18 @@ rte_kni_init(unsigned int max_kni_ifaces)
if (max_kni_ifaces == 0) {
RTE_LOG(ERR, KNI, "Invalid number of max_kni_ifaces %d\n",
max_kni_ifaces);
- rte_panic("Unable to initialize KNI\n");
+ RTE_LOG(ERR, KNI, "Unable to initialize KNI\n");
+ return;
}
/* Check FD and open */
if (kni_fd < 0) {
kni_fd = open("/dev/" KNI_DEVICE, O_RDWR);
- if (kni_fd < 0)
- rte_panic("Can not open /dev/%s\n", KNI_DEVICE);
+ if (kni_fd < 0) {
+ RTE_LOG(ERR, KNI,
+ "Can not open /dev/%s\n", KNI_DEVICE);
+ return;
+ }
}
/* Allocate slot objects */
@@ -307,8 +311,8 @@ rte_kni_init(unsigned int max_kni_ifaces)
return;
kni_fail:
- rte_panic("Unable to allocate memory for max_kni_ifaces:%d. Increase the amount of hugepages memory\n",
- max_kni_ifaces);
+ RTE_LOG(ERR, KNI, "Unable to allocate memory for max_kni_ifaces:%d."
+ "Increase the amount of hugepages memory\n", max_kni_ifaces);
}
@@ -321,9 +325,7 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
struct rte_kni_device_info dev_info;
struct rte_kni *ctx;
char intf_name[RTE_KNI_NAMESIZE];
- char mz_name[RTE_MEMZONE_NAMESIZE];
const struct rte_memzone *mz;
- const struct rte_mempool *mp;
struct rte_kni_memzone_slot *slot = NULL;
if (!pktmbuf_pool || !conf || !conf->name[0])
@@ -414,19 +416,6 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
dev_info.sync_va = mz->addr;
dev_info.sync_phys = mz->phys_addr;
-
- /* MBUF mempool */
- snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT,
- pktmbuf_pool->name);
- mz = rte_memzone_lookup(mz_name);
- KNI_MEM_CHECK(mz == NULL);
- mp = (struct rte_mempool *)mz->addr;
- /* KNI currently requires to have only one memory chunk */
- if (mp->nb_mem_chunks != 1)
- goto kni_fail;
-
- dev_info.mbuf_va = STAILQ_FIRST(&mp->mem_list)->addr;
- dev_info.mbuf_phys = STAILQ_FIRST(&mp->mem_list)->phys_addr;
ctx->pktmbuf_pool = pktmbuf_pool;
ctx->group_id = conf->group_id;
ctx->slot_id = slot->id;
@@ -462,6 +451,20 @@ kni_free_fifo(struct rte_kni_fifo *fifo)
} while (ret);
}
+static void
+kni_free_fifo_phy(struct rte_kni_fifo *fifo)
+{
+ void *mbuf_phys;
+ int ret;
+
+ do {
+ ret = kni_fifo_get(fifo, &mbuf_phys, 1);
+ /*
+ * TODO: free mbufs
+ */
+ } while (ret);
+}
+
int
rte_kni_release(struct rte_kni *kni)
{
@@ -479,8 +482,8 @@ rte_kni_release(struct rte_kni *kni)
/* mbufs in all fifo should be released, except request/response */
kni_free_fifo(kni->tx_q);
- kni_free_fifo(kni->rx_q);
- kni_free_fifo(kni->alloc_q);
+ kni_free_fifo_phy(kni->rx_q);
+ kni_free_fifo_phy(kni->alloc_q);
kni_free_fifo(kni->free_q);
slot_id = kni->slot_id;
@@ -490,8 +493,9 @@ rte_kni_release(struct rte_kni *kni)
/* Release memzone */
if (slot_id > kni_memzone_pool.max_ifaces) {
- rte_panic("KNI pool: corrupted slot ID: %d, max: %d\n",
+ RTE_LOG(ERR, KNI, "KNI pool: corrupted slot ID: %d, max: %d\n",
slot_id, kni_memzone_pool.max_ifaces);
+ return -1;
}
kni_memzone_pool_release(&kni_memzone_pool.slots[slot_id]);
@@ -513,7 +517,8 @@ rte_kni_handle_request(struct rte_kni *kni)
return 0; /* It is OK of can not getting the request mbuf */
if (req != kni->sync_addr) {
- rte_panic("Wrong req pointer %p\n", req);
+ RTE_LOG(ERR, KNI, "Wrong req pointer %p\n", req);
+ return -1;
}
/* Analyze the request and call the relevant actions for it */
@@ -544,10 +549,25 @@ rte_kni_handle_request(struct rte_kni *kni)
return 0;
}
+static void *
+va2pa(struct rte_mbuf *m)
+{
+ return (void *)((unsigned long)m -
+ ((unsigned long)m->buf_addr -
+ (unsigned long)m->buf_physaddr));
+}
+
unsigned
rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
{
- unsigned ret = kni_fifo_put(kni->rx_q, (void **)mbufs, num);
+ void *phy_mbufs[num];
+ unsigned int ret;
+ unsigned int i;
+
+ for (i = 0; i < num; i++)
+ phy_mbufs[i] = va2pa(mbufs[i]);
+
+ ret = kni_fifo_put(kni->rx_q, phy_mbufs, num);
/* Get mbufs from free_q and then free them */
kni_free_mbufs(kni);
@@ -585,6 +605,7 @@ kni_allocate_mbufs(struct rte_kni *kni)
{
int i, ret;
struct rte_mbuf *pkts[MAX_MBUF_BURST_NUM];
+ void *phys[MAX_MBUF_BURST_NUM];
RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pool) !=
offsetof(struct rte_kni_mbuf, pool));
@@ -614,13 +635,14 @@ kni_allocate_mbufs(struct rte_kni *kni)
RTE_LOG(ERR, KNI, "Out of memory\n");
break;
}
+ phys[i] = va2pa(pkts[i]);
}
/* No pkt mbuf alocated */
if (i <= 0)
return;
- ret = kni_fifo_put(kni->alloc_q, (void **)pkts, i);
+ ret = kni_fifo_put(kni->alloc_q, phys, i);
/* Check if any mbufs not put into alloc_q, and then free them */
if (ret >= 0 && ret < i && ret < MAX_MBUF_BURST_NUM) {
diff --git a/lib/librte_kni/rte_kni.h b/lib/librte_kni/rte_kni.h
index 7363e6cf..37deb472 100644
--- a/lib/librte_kni/rte_kni.h
+++ b/lib/librte_kni/rte_kni.h
@@ -42,7 +42,7 @@
* interfaces that may be used by the RTE application to receive/transmit
* packets from/to Linux kernel net interfaces.
*
- * This library provide two APIs to burst receive packets from KNI interfaces,
+ * This library provides two APIs to burst receive packets from KNI interfaces,
* and burst transmit packets to KNI interfaces.
*/
@@ -88,6 +88,7 @@ struct rte_kni_conf {
struct rte_pci_addr addr;
struct rte_pci_id id;
+ __extension__
uint8_t force_bind : 1; /* Flag to bind kernel thread */
};
diff --git a/lib/librte_lpm/Makefile b/lib/librte_lpm/Makefile
index 656ade27..3dc549dc 100644
--- a/lib/librte_lpm/Makefile
+++ b/lib/librte_lpm/Makefile
@@ -51,6 +51,8 @@ ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),)
SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include += rte_lpm_neon.h
else ifeq ($(CONFIG_RTE_ARCH_X86),y)
SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include += rte_lpm_sse.h
+else ifeq ($(CONFIG_RTE_ARCH_PPC_64),y)
+SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include += rte_lpm_altivec.h
endif
# this lib needs eal
diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c
index e1b5d94a..8c15c4c9 100644
--- a/lib/librte_lpm/rte_lpm.c
+++ b/lib/librte_lpm/rte_lpm.c
@@ -942,14 +942,9 @@ add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
/* Insert new rule into the tbl8 entry. */
for (i = tbl8_index; i < tbl8_index + tbl8_range; i++) {
- if (!lpm->tbl8[i].valid ||
- lpm->tbl8[i].depth <= depth) {
- lpm->tbl8[i].valid = VALID;
- lpm->tbl8[i].depth = depth;
- lpm->tbl8[i].next_hop = next_hop;
-
- continue;
- }
+ lpm->tbl8[i].valid = VALID;
+ lpm->tbl8[i].depth = depth;
+ lpm->tbl8[i].next_hop = next_hop;
}
/*
@@ -1073,14 +1068,9 @@ add_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
/* Insert new rule into the tbl8 entry. */
for (i = tbl8_index; i < tbl8_index + tbl8_range; i++) {
- if (!lpm->tbl8[i].valid ||
- lpm->tbl8[i].depth <= depth) {
- lpm->tbl8[i].valid = VALID;
- lpm->tbl8[i].depth = depth;
- lpm->tbl8[i].next_hop = next_hop;
-
- continue;
- }
+ lpm->tbl8[i].valid = VALID;
+ lpm->tbl8[i].depth = depth;
+ lpm->tbl8[i].next_hop = next_hop;
}
/*
diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
index 2df1d672..682865e4 100644
--- a/lib/librte_lpm/rte_lpm.h
+++ b/lib/librte_lpm/rte_lpm.h
@@ -93,12 +93,14 @@ extern "C" {
#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
/** @internal Tbl24 entry structure. */
+__extension__
struct rte_lpm_tbl_entry_v20 {
/**
* Stores Next hop (tbl8 or tbl24 when valid_group is not set) or
* a group index pointing to a tbl8 structure (tbl24 only, when
* valid_group is set)
*/
+ RTE_STD_C11
union {
uint8_t next_hop;
uint8_t group_idx;
@@ -116,6 +118,7 @@ struct rte_lpm_tbl_entry_v20 {
uint8_t depth :6; /**< Rule depth. */
};
+__extension__
struct rte_lpm_tbl_entry {
/**
* Stores Next hop (tbl8 or tbl24 when valid_group is not set) or
@@ -137,6 +140,7 @@ struct rte_lpm_tbl_entry {
};
#else
+__extension__
struct rte_lpm_tbl_entry_v20 {
uint8_t depth :6;
uint8_t valid_group :1;
@@ -147,6 +151,7 @@ struct rte_lpm_tbl_entry_v20 {
};
};
+__extension__
struct rte_lpm_tbl_entry {
uint32_t depth :6;
uint32_t valid_group :1;
@@ -193,7 +198,7 @@ struct rte_lpm_v20 {
__rte_cache_aligned; /**< LPM tbl24 table. */
struct rte_lpm_tbl_entry_v20 tbl8[RTE_LPM_TBL8_NUM_ENTRIES]
__rte_cache_aligned; /**< LPM tbl8 table. */
- struct rte_lpm_rule_v20 rules_tbl[0] \
+ struct rte_lpm_rule_v20 rules_tbl[]
__rte_cache_aligned; /**< LPM rules. */
};
@@ -480,6 +485,8 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
#include "rte_lpm_neon.h"
+#elif defined(RTE_ARCH_PPC_64)
+#include "rte_lpm_altivec.h"
#else
#include "rte_lpm_sse.h"
#endif
diff --git a/lib/librte_lpm/rte_lpm_altivec.h b/lib/librte_lpm/rte_lpm_altivec.h
new file mode 100644
index 00000000..e26e0875
--- /dev/null
+++ b/lib/librte_lpm/rte_lpm_altivec.h
@@ -0,0 +1,154 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) IBM Corporation 2016.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_LPM_ALTIVEC_H_
+#define _RTE_LPM_ALTIVEC_H_
+
+#include <rte_branch_prediction.h>
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void
+rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+ uint32_t defv)
+{
+ vector signed int i24;
+ rte_xmm_t i8;
+ uint32_t tbl[4];
+ uint64_t idx, pt, pt2;
+ const uint32_t *ptbl;
+
+ const uint32_t mask = UINT8_MAX;
+ const vector signed int mask8 = (xmm_t){mask, mask, mask, mask};
+
+ /*
+ * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 2 LPM entries
+ * as one 64-bit value (0x0300000003000000).
+ */
+ const uint64_t mask_xv =
+ ((uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK |
+ (uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32);
+
+ /*
+ * RTE_LPM_LOOKUP_SUCCESS for 2 LPM entries
+ * as one 64-bit value (0x0100000001000000).
+ */
+ const uint64_t mask_v =
+ ((uint64_t)RTE_LPM_LOOKUP_SUCCESS |
+ (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32);
+
+ /* get 4 indexes for tbl24[]. */
+ i24 = vec_sr((xmm_t) ip,
+ (vector unsigned int){CHAR_BIT, CHAR_BIT, CHAR_BIT, CHAR_BIT});
+
+ /* extract values from tbl24[] */
+ idx = (uint32_t)i24[0];
+ idx = idx < (1<<24) ? idx : (1<<24)-1;
+ ptbl = (const uint32_t *)&lpm->tbl24[idx];
+ tbl[0] = *ptbl;
+
+ idx = (uint32_t) i24[1];
+ idx = idx < (1<<24) ? idx : (1<<24)-1;
+ ptbl = (const uint32_t *)&lpm->tbl24[idx];
+ tbl[1] = *ptbl;
+
+ idx = (uint32_t) i24[2];
+ idx = idx < (1<<24) ? idx : (1<<24)-1;
+ ptbl = (const uint32_t *)&lpm->tbl24[idx];
+ tbl[2] = *ptbl;
+
+ idx = (uint32_t) i24[3];
+ idx = idx < (1<<24) ? idx : (1<<24)-1;
+ ptbl = (const uint32_t *)&lpm->tbl24[idx];
+ tbl[3] = *ptbl;
+
+ /* get 4 indexes for tbl8[]. */
+ i8.x = vec_and(ip, mask8);
+
+ pt = (uint64_t)tbl[0] |
+ (uint64_t)tbl[1] << 32;
+ pt2 = (uint64_t)tbl[2] |
+ (uint64_t)tbl[3] << 32;
+
+ /* search successfully finished for all 4 IP addresses. */
+ if (likely((pt & mask_xv) == mask_v) &&
+ likely((pt2 & mask_xv) == mask_v)) {
+ *(uint64_t *)hop = pt & RTE_LPM_MASKX4_RES;
+ *(uint64_t *)(hop + 2) = pt2 & RTE_LPM_MASKX4_RES;
+ return;
+ }
+
+ if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+ RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+ i8.u32[0] = i8.u32[0] +
+ (uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+ ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[0]];
+ tbl[0] = *ptbl;
+ }
+ if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+ RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+ i8.u32[1] = i8.u32[1] +
+ (uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+ ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[1]];
+ tbl[1] = *ptbl;
+ }
+ if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+ RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+ i8.u32[2] = i8.u32[2] +
+ (uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+ ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[2]];
+ tbl[2] = *ptbl;
+ }
+ if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+ RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+ i8.u32[3] = i8.u32[3] +
+ (uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+ ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[3]];
+ tbl[3] = *ptbl;
+ }
+
+ hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[0] & 0x00FFFFFF : defv;
+ hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[1] & 0x00FFFFFF : defv;
+ hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[2] & 0x00FFFFFF : defv;
+ hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[3] & 0x00FFFFFF : defv;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LPM_ALTIVEC_H_ */
diff --git a/lib/librte_lpm/rte_lpm_neon.h b/lib/librte_lpm/rte_lpm_neon.h
index 7c643159..7efd9a0d 100644
--- a/lib/librte_lpm/rte_lpm_neon.h
+++ b/lib/librte_lpm/rte_lpm_neon.h
@@ -43,6 +43,7 @@
#include <rte_byteorder.h>
#include <rte_common.h>
#include <rte_vect.h>
+#include <rte_lpm.h>
#ifdef __cplusplus
extern "C" {
diff --git a/lib/librte_lpm/rte_lpm_sse.h b/lib/librte_lpm/rte_lpm_sse.h
index da830995..ef33c6a1 100644
--- a/lib/librte_lpm/rte_lpm_sse.h
+++ b/lib/librte_lpm/rte_lpm_sse.h
@@ -38,6 +38,7 @@
#include <rte_byteorder.h>
#include <rte_common.h>
#include <rte_vect.h>
+#include <rte_lpm.h>
#ifdef __cplusplus
extern "C" {
diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile
index 8d62b0d3..4ae2e8c8 100644
--- a/lib/librte_mbuf/Makefile
+++ b/lib/librte_mbuf/Makefile
@@ -41,10 +41,10 @@ EXPORT_MAP := rte_mbuf_version.map
LIBABIVER := 2
# all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c
+SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c
# install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h rte_mbuf_ptype.h
# this lib needs eal
DEPDIRS-$(CONFIG_RTE_LIBRTE_MBUF) += lib/librte_eal lib/librte_mempool
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index d2c87526..63f43c89 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -53,12 +53,12 @@
#include <rte_lcore.h>
#include <rte_atomic.h>
#include <rte_branch_prediction.h>
-#include <rte_ring.h>
#include <rte_mempool.h>
#include <rte_mbuf.h>
#include <rte_string_fns.h>
#include <rte_hexdump.h>
#include <rte_errno.h>
+#include <rte_memcpy.h>
/*
* ctrlmbuf constructor, given as a callback function to
@@ -264,6 +264,40 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
}
}
+/* read len data bytes in a mbuf at specified offset (internal) */
+const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off,
+ uint32_t len, void *buf)
+{
+ const struct rte_mbuf *seg = m;
+ uint32_t buf_off = 0, copy_len;
+
+ if (off + len > rte_pktmbuf_pkt_len(m))
+ return NULL;
+
+ while (off >= rte_pktmbuf_data_len(seg)) {
+ off -= rte_pktmbuf_data_len(seg);
+ seg = seg->next;
+ }
+
+ if (off + len <= rte_pktmbuf_data_len(seg))
+ return rte_pktmbuf_mtod_offset(seg, char *, off);
+
+ /* rare case: header is split among several segments */
+ while (len > 0) {
+ copy_len = rte_pktmbuf_data_len(seg) - off;
+ if (copy_len > len)
+ copy_len = len;
+ rte_memcpy((char *)buf + buf_off,
+ rte_pktmbuf_mtod_offset(seg, char *, off), copy_len);
+ off = 0;
+ buf_off += copy_len;
+ len -= copy_len;
+ seg = seg->next;
+ }
+
+ return buf;
+}
+
/*
* Get the name of a RX offload flag. Must be kept synchronized with flag
* definitions in rte_mbuf.h.
@@ -275,16 +309,78 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask)
case PKT_RX_RSS_HASH: return "PKT_RX_RSS_HASH";
case PKT_RX_FDIR: return "PKT_RX_FDIR";
case PKT_RX_L4_CKSUM_BAD: return "PKT_RX_L4_CKSUM_BAD";
+ case PKT_RX_L4_CKSUM_GOOD: return "PKT_RX_L4_CKSUM_GOOD";
+ case PKT_RX_L4_CKSUM_NONE: return "PKT_RX_L4_CKSUM_NONE";
case PKT_RX_IP_CKSUM_BAD: return "PKT_RX_IP_CKSUM_BAD";
+ case PKT_RX_IP_CKSUM_GOOD: return "PKT_RX_IP_CKSUM_GOOD";
+ case PKT_RX_IP_CKSUM_NONE: return "PKT_RX_IP_CKSUM_NONE";
case PKT_RX_EIP_CKSUM_BAD: return "PKT_RX_EIP_CKSUM_BAD";
case PKT_RX_VLAN_STRIPPED: return "PKT_RX_VLAN_STRIPPED";
case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP";
case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST";
case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED";
+ case PKT_RX_LRO: return "PKT_RX_LRO";
default: return NULL;
}
}
+struct flag_mask {
+ uint64_t flag;
+ uint64_t mask;
+ const char *default_name;
+};
+
+/* write the list of rx ol flags in buffer buf */
+int
+rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
+{
+ const struct flag_mask rx_flags[] = {
+ { PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT, NULL },
+ { PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, NULL },
+ { PKT_RX_FDIR, PKT_RX_FDIR, NULL },
+ { PKT_RX_L4_CKSUM_BAD, PKT_RX_L4_CKSUM_MASK, NULL },
+ { PKT_RX_L4_CKSUM_GOOD, PKT_RX_L4_CKSUM_MASK, NULL },
+ { PKT_RX_L4_CKSUM_NONE, PKT_RX_L4_CKSUM_MASK, NULL },
+ { PKT_RX_L4_CKSUM_UNKNOWN, PKT_RX_L4_CKSUM_MASK,
+ "PKT_RX_L4_CKSUM_UNKNOWN" },
+ { PKT_RX_IP_CKSUM_BAD, PKT_RX_IP_CKSUM_MASK, NULL },
+ { PKT_RX_IP_CKSUM_GOOD, PKT_RX_IP_CKSUM_MASK, NULL },
+ { PKT_RX_IP_CKSUM_NONE, PKT_RX_IP_CKSUM_MASK, NULL },
+ { PKT_RX_IP_CKSUM_UNKNOWN, PKT_RX_IP_CKSUM_MASK,
+ "PKT_RX_IP_CKSUM_UNKNOWN" },
+ { PKT_RX_EIP_CKSUM_BAD, PKT_RX_EIP_CKSUM_BAD, NULL },
+ { PKT_RX_VLAN_STRIPPED, PKT_RX_VLAN_STRIPPED, NULL },
+ { PKT_RX_IEEE1588_PTP, PKT_RX_IEEE1588_PTP, NULL },
+ { PKT_RX_IEEE1588_TMST, PKT_RX_IEEE1588_TMST, NULL },
+ { PKT_RX_QINQ_STRIPPED, PKT_RX_QINQ_STRIPPED, NULL },
+ { PKT_RX_LRO, PKT_RX_LRO, NULL },
+ };
+ const char *name;
+ unsigned int i;
+ int ret;
+
+ if (buflen == 0)
+ return -1;
+
+ buf[0] = '\0';
+ for (i = 0; i < RTE_DIM(rx_flags); i++) {
+ if ((mask & rx_flags[i].mask) != rx_flags[i].flag)
+ continue;
+ name = rte_get_rx_ol_flag_name(rx_flags[i].flag);
+ if (name == NULL)
+ name = rx_flags[i].default_name;
+ ret = snprintf(buf, buflen, "%s ", name);
+ if (ret < 0)
+ return -1;
+ if ((size_t)ret >= buflen)
+ return -1;
+ buf += ret;
+ buflen -= ret;
+ }
+
+ return 0;
+}
+
/*
* Get the name of a TX offload flag. Must be kept synchronized with flag
* definitions in rte_mbuf.h.
@@ -304,6 +400,63 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask)
case PKT_TX_OUTER_IP_CKSUM: return "PKT_TX_OUTER_IP_CKSUM";
case PKT_TX_OUTER_IPV4: return "PKT_TX_OUTER_IPV4";
case PKT_TX_OUTER_IPV6: return "PKT_TX_OUTER_IPV6";
+ case PKT_TX_TUNNEL_VXLAN: return "PKT_TX_TUNNEL_VXLAN";
+ case PKT_TX_TUNNEL_GRE: return "PKT_TX_TUNNEL_GRE";
+ case PKT_TX_TUNNEL_IPIP: return "PKT_TX_TUNNEL_IPIP";
+ case PKT_TX_TUNNEL_GENEVE: return "PKT_TX_TUNNEL_GENEVE";
default: return NULL;
}
}
+
+/* write the list of tx ol flags in buffer buf */
+int
+rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
+{
+ const struct flag_mask tx_flags[] = {
+ { PKT_TX_VLAN_PKT, PKT_TX_VLAN_PKT, NULL },
+ { PKT_TX_IP_CKSUM, PKT_TX_IP_CKSUM, NULL },
+ { PKT_TX_TCP_CKSUM, PKT_TX_L4_MASK, NULL },
+ { PKT_TX_SCTP_CKSUM, PKT_TX_L4_MASK, NULL },
+ { PKT_TX_UDP_CKSUM, PKT_TX_L4_MASK, NULL },
+ { PKT_TX_L4_NO_CKSUM, PKT_TX_L4_MASK, "PKT_TX_L4_NO_CKSUM" },
+ { PKT_TX_IEEE1588_TMST, PKT_TX_IEEE1588_TMST, NULL },
+ { PKT_TX_TCP_SEG, PKT_TX_TCP_SEG, NULL },
+ { PKT_TX_IPV4, PKT_TX_IPV4, NULL },
+ { PKT_TX_IPV6, PKT_TX_IPV6, NULL },
+ { PKT_TX_OUTER_IP_CKSUM, PKT_TX_OUTER_IP_CKSUM, NULL },
+ { PKT_TX_OUTER_IPV4, PKT_TX_OUTER_IPV4, NULL },
+ { PKT_TX_OUTER_IPV6, PKT_TX_OUTER_IPV6, NULL },
+ { PKT_TX_TUNNEL_VXLAN, PKT_TX_TUNNEL_MASK,
+ "PKT_TX_TUNNEL_NONE" },
+ { PKT_TX_TUNNEL_GRE, PKT_TX_TUNNEL_MASK,
+ "PKT_TX_TUNNEL_NONE" },
+ { PKT_TX_TUNNEL_IPIP, PKT_TX_TUNNEL_MASK,
+ "PKT_TX_TUNNEL_NONE" },
+ { PKT_TX_TUNNEL_GENEVE, PKT_TX_TUNNEL_MASK,
+ "PKT_TX_TUNNEL_NONE" },
+ };
+ const char *name;
+ unsigned int i;
+ int ret;
+
+ if (buflen == 0)
+ return -1;
+
+ buf[0] = '\0';
+ for (i = 0; i < RTE_DIM(tx_flags); i++) {
+ if ((mask & tx_flags[i].mask) != tx_flags[i].flag)
+ continue;
+ name = rte_get_tx_ol_flag_name(tx_flags[i].flag);
+ if (name == NULL)
+ name = tx_flags[i].default_name;
+ ret = snprintf(buf, buflen, "%s ", name);
+ if (ret < 0)
+ return -1;
+ if ((size_t)ret >= buflen)
+ return -1;
+ buf += ret;
+ buflen -= ret;
+ }
+
+ return 0;
+}
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 101485fb..ead7c6ea 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -44,7 +44,7 @@
* buffers. The message buffers are stored in a mempool, using the
* RTE mempool library.
*
- * This library provide an API to allocate/free packet mbufs, which are
+ * This library provides an API to allocate/free packet mbufs, which are
* used to carry network packets.
*
* To understand the concepts of packet buffers or mbufs, you
@@ -60,6 +60,7 @@
#include <rte_atomic.h>
#include <rte_prefetch.h>
#include <rte_branch_prediction.h>
+#include <rte_mbuf_ptype.h>
#ifdef __cplusplus
extern "C" {
@@ -90,8 +91,25 @@ extern "C" {
#define PKT_RX_RSS_HASH (1ULL << 1) /**< RX packet with RSS hash result. */
#define PKT_RX_FDIR (1ULL << 2) /**< RX packet with FDIR match indicate. */
-#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) /**< L4 cksum of RX pkt. is not OK. */
-#define PKT_RX_IP_CKSUM_BAD (1ULL << 4) /**< IP cksum of RX pkt. is not OK. */
+
+/**
+ * Deprecated.
+ * Checking this flag alone is deprecated: check the 2 bits of
+ * PKT_RX_L4_CKSUM_MASK.
+ * This flag was set when the L4 checksum of a packet was detected as
+ * wrong by the hardware.
+ */
+#define PKT_RX_L4_CKSUM_BAD (1ULL << 3)
+
+/**
+ * Deprecated.
+ * Checking this flag alone is deprecated: check the 2 bits of
+ * PKT_RX_IP_CKSUM_MASK.
+ * This flag was set when the IP checksum of a packet was detected as
+ * wrong by the hardware.
+ */
+#define PKT_RX_IP_CKSUM_BAD (1ULL << 4)
+
#define PKT_RX_EIP_CKSUM_BAD (1ULL << 5) /**< External IP header checksum error. */
/**
@@ -101,7 +119,35 @@ extern "C" {
*/
#define PKT_RX_VLAN_STRIPPED (1ULL << 6)
-/* hole, some bits can be reused here */
+/**
+ * Mask of bits used to determine the status of RX IP checksum.
+ * - PKT_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
+ * - PKT_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
+ * - PKT_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
+ * - PKT_RX_IP_CKSUM_NONE: the IP checksum is not correct in the packet
+ * data, but the integrity of the IP header is verified.
+ */
+#define PKT_RX_IP_CKSUM_MASK ((1ULL << 4) | (1ULL << 7))
+
+#define PKT_RX_IP_CKSUM_UNKNOWN 0
+#define PKT_RX_IP_CKSUM_BAD (1ULL << 4)
+#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7)
+#define PKT_RX_IP_CKSUM_NONE ((1ULL << 4) | (1ULL << 7))
+
+/**
+ * Mask of bits used to determine the status of RX L4 checksum.
+ * - PKT_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
+ * - PKT_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
+ * - PKT_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
+ * - PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
+ * data, but the integrity of the L4 data is verified.
+ */
+#define PKT_RX_L4_CKSUM_MASK ((1ULL << 3) | (1ULL << 8))
+
+#define PKT_RX_L4_CKSUM_UNKNOWN 0
+#define PKT_RX_L4_CKSUM_BAD (1ULL << 3)
+#define PKT_RX_L4_CKSUM_GOOD (1ULL << 8)
+#define PKT_RX_L4_CKSUM_NONE ((1ULL << 3) | (1ULL << 8))
#define PKT_RX_IEEE1588_PTP (1ULL << 9) /**< RX IEEE1588 L2 Ethernet PT Packet. */
#define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/
@@ -124,11 +170,30 @@ extern "C" {
*/
#define PKT_RX_QINQ_PKT PKT_RX_QINQ_STRIPPED
+/**
+ * When packets are coalesced by a hardware or virtual driver, this flag
+ * can be set in the RX mbuf, meaning that the m->tso_segsz field is
+ * valid and is set to the segment size of original packets.
+ */
+#define PKT_RX_LRO (1ULL << 16)
+
/* add new RX flags here */
/* add new TX flags here */
/**
+ * Bits 45:48 used for the tunnel type.
+ * When doing Tx offload like TSO or checksum, the HW needs to configure the
+ * tunnel type into the HW descriptors.
+ */
+#define PKT_TX_TUNNEL_VXLAN (0x1ULL << 45)
+#define PKT_TX_TUNNEL_GRE (0x2ULL << 45)
+#define PKT_TX_TUNNEL_IPIP (0x3ULL << 45)
+#define PKT_TX_TUNNEL_GENEVE (0x4ULL << 45)
+/* add new TX TUNNEL type here */
+#define PKT_TX_TUNNEL_MASK (0xFULL << 45)
+
+/**
* Second VLAN insertion (QinQ) flag.
*/
#define PKT_TX_QINQ_PKT (1ULL << 49) /**< TX packet with double VLAN inserted. */
@@ -225,500 +290,6 @@ extern "C" {
/* Use final bit of flags to indicate a control mbuf */
#define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */
-/*
- * 32 bits are divided into several fields to mark packet types. Note that
- * each field is indexical.
- * - Bit 3:0 is for L2 types.
- * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types.
- * - Bit 11:8 is for L4 or outer L4 (for tunneling case) types.
- * - Bit 15:12 is for tunnel types.
- * - Bit 19:16 is for inner L2 types.
- * - Bit 23:20 is for inner L3 types.
- * - Bit 27:24 is for inner L4 types.
- * - Bit 31:28 is reserved.
- *
- * To be compatible with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT,
- * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP
- * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits.
- *
- * Note that L3 types values are selected for checking IPV4/IPV6 header from
- * performance point of view. Reading annotations of RTE_ETH_IS_IPV4_HDR and
- * RTE_ETH_IS_IPV6_HDR is needed for any future changes of L3 type values.
- *
- * Note that the packet types of the same packet recognized by different
- * hardware may be different, as different hardware may have different
- * capability of packet type recognition.
- *
- * examples:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=0x29
- * | 'version'=6, 'next header'=0x3A
- * | 'ICMPv6 header'>
- * will be recognized on i40e hardware as packet type combination of,
- * RTE_PTYPE_L2_ETHER |
- * RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
- * RTE_PTYPE_TUNNEL_IP |
- * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
- * RTE_PTYPE_INNER_L4_ICMP.
- *
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=0x2F
- * | 'GRE header'
- * | 'version'=6, 'next header'=0x11
- * | 'UDP header'>
- * will be recognized on i40e hardware as packet type combination of,
- * RTE_PTYPE_L2_ETHER |
- * RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
- * RTE_PTYPE_TUNNEL_GRENAT |
- * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
- * RTE_PTYPE_INNER_L4_UDP.
- */
-#define RTE_PTYPE_UNKNOWN 0x00000000
-/**
- * Ethernet packet type.
- * It is used for outer packet for tunneling cases.
- *
- * Packet format:
- * <'ether type'=[0x0800|0x86DD]>
- */
-#define RTE_PTYPE_L2_ETHER 0x00000001
-/**
- * Ethernet packet type for time sync.
- *
- * Packet format:
- * <'ether type'=0x88F7>
- */
-#define RTE_PTYPE_L2_ETHER_TIMESYNC 0x00000002
-/**
- * ARP (Address Resolution Protocol) packet type.
- *
- * Packet format:
- * <'ether type'=0x0806>
- */
-#define RTE_PTYPE_L2_ETHER_ARP 0x00000003
-/**
- * LLDP (Link Layer Discovery Protocol) packet type.
- *
- * Packet format:
- * <'ether type'=0x88CC>
- */
-#define RTE_PTYPE_L2_ETHER_LLDP 0x00000004
-/**
- * NSH (Network Service Header) packet type.
- *
- * Packet format:
- * <'ether type'=0x894F>
- */
-#define RTE_PTYPE_L2_ETHER_NSH 0x00000005
-/**
- * Mask of layer 2 packet types.
- * It is used for outer packet for tunneling cases.
- */
-#define RTE_PTYPE_L2_MASK 0x0000000f
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for outer packet for tunneling cases, and does not contain any
- * header option.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=5>
- */
-#define RTE_PTYPE_L3_IPV4 0x00000010
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for outer packet for tunneling cases, and contains header
- * options.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=[6-15], 'options'>
- */
-#define RTE_PTYPE_L3_IPV4_EXT 0x00000030
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for outer packet for tunneling cases, and does not contain any
- * extension header.
- *
- * Packet format:
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=0x3B>
- */
-#define RTE_PTYPE_L3_IPV6 0x00000040
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for outer packet for tunneling cases, and may or maynot contain
- * header options.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=[5-15], <'options'>>
- */
-#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN 0x00000090
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for outer packet for tunneling cases, and contains extension
- * headers.
- *
- * Packet format:
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
- * 'extension headers'>
- */
-#define RTE_PTYPE_L3_IPV6_EXT 0x000000c0
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for outer packet for tunneling cases, and may or maynot contain
- * extension headers.
- *
- * Packet format:
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
- * <'extension headers'>>
- */
-#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN 0x000000e0
-/**
- * Mask of layer 3 packet types.
- * It is used for outer packet for tunneling cases.
- */
-#define RTE_PTYPE_L3_MASK 0x000000f0
-/**
- * TCP (Transmission Control Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=6, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=6>
- */
-#define RTE_PTYPE_L4_TCP 0x00000100
-/**
- * UDP (User Datagram Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=17, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=17>
- */
-#define RTE_PTYPE_L4_UDP 0x00000200
-/**
- * Fragmented IP (Internet Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * It refers to those packets of any IP types, which can be recognized as
- * fragmented. A fragmented packet cannot be recognized as any other L4 types
- * (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP,
- * RTE_PTYPE_L4_NONFRAG).
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'MF'=1>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=44>
- */
-#define RTE_PTYPE_L4_FRAG 0x00000300
-/**
- * SCTP (Stream Control Transmission Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=132, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=132>
- */
-#define RTE_PTYPE_L4_SCTP 0x00000400
-/**
- * ICMP (Internet Control Message Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=1, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=1>
- */
-#define RTE_PTYPE_L4_ICMP 0x00000500
-/**
- * Non-fragmented IP (Internet Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * It refers to those packets of any IP types, while cannot be recognized as
- * any of above L4 types (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP,
- * RTE_PTYPE_L4_FRAG, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP).
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'!=[6|17|44|132|1]>
- */
-#define RTE_PTYPE_L4_NONFRAG 0x00000600
-/**
- * Mask of layer 4 packet types.
- * It is used for outer packet for tunneling cases.
- */
-#define RTE_PTYPE_L4_MASK 0x00000f00
-/**
- * IP (Internet Protocol) in IP (Internet Protocol) tunneling packet type.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=[4|41]>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=[4|41]>
- */
-#define RTE_PTYPE_TUNNEL_IP 0x00001000
-/**
- * GRE (Generic Routing Encapsulation) tunneling packet type.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=47>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=47>
- */
-#define RTE_PTYPE_TUNNEL_GRE 0x00002000
-/**
- * VXLAN (Virtual eXtensible Local Area Network) tunneling packet type.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=17
- * | 'destination port'=4798>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=17
- * | 'destination port'=4798>
- */
-#define RTE_PTYPE_TUNNEL_VXLAN 0x00003000
-/**
- * NVGRE (Network Virtualization using Generic Routing Encapsulation) tunneling
- * packet type.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=47
- * | 'protocol type'=0x6558>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=47
- * | 'protocol type'=0x6558'>
- */
-#define RTE_PTYPE_TUNNEL_NVGRE 0x00004000
-/**
- * GENEVE (Generic Network Virtualization Encapsulation) tunneling packet type.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=17
- * | 'destination port'=6081>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=17
- * | 'destination port'=6081>
- */
-#define RTE_PTYPE_TUNNEL_GENEVE 0x00005000
-/**
- * Tunneling packet type of Teredo, VXLAN (Virtual eXtensible Local Area
- * Network) or GRE (Generic Routing Encapsulation) could be recognized as this
- * packet type, if they can not be recognized independently as of hardware
- * capability.
- */
-#define RTE_PTYPE_TUNNEL_GRENAT 0x00006000
-/**
- * Mask of tunneling packet types.
- */
-#define RTE_PTYPE_TUNNEL_MASK 0x0000f000
-/**
- * Ethernet packet type.
- * It is used for inner packet type only.
- *
- * Packet format (inner only):
- * <'ether type'=[0x800|0x86DD]>
- */
-#define RTE_PTYPE_INNER_L2_ETHER 0x00010000
-/**
- * Ethernet packet type with VLAN (Virtual Local Area Network) tag.
- *
- * Packet format (inner only):
- * <'ether type'=[0x800|0x86DD], vlan=[1-4095]>
- */
-#define RTE_PTYPE_INNER_L2_ETHER_VLAN 0x00020000
-/**
- * Mask of inner layer 2 packet types.
- */
-#define RTE_PTYPE_INNER_L2_MASK 0x000f0000
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for inner packet only, and does not contain any header option.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=5>
- */
-#define RTE_PTYPE_INNER_L3_IPV4 0x00100000
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for inner packet only, and contains header options.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=[6-15], 'options'>
- */
-#define RTE_PTYPE_INNER_L3_IPV4_EXT 0x00200000
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for inner packet only, and does not contain any extension header.
- *
- * Packet format (inner only):
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=0x3B>
- */
-#define RTE_PTYPE_INNER_L3_IPV6 0x00300000
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for inner packet only, and may or maynot contain header options.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=[5-15], <'options'>>
- */
-#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for inner packet only, and contains extension headers.
- *
- * Packet format (inner only):
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
- * 'extension headers'>
- */
-#define RTE_PTYPE_INNER_L3_IPV6_EXT 0x00500000
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for inner packet only, and may or maynot contain extension
- * headers.
- *
- * Packet format (inner only):
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
- * <'extension headers'>>
- */
-#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000
-/**
- * Mask of inner layer 3 packet types.
- */
-#define RTE_PTYPE_INNER_L3_MASK 0x00f00000
-/**
- * TCP (Transmission Control Protocol) packet type.
- * It is used for inner packet only.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=6, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=6>
- */
-#define RTE_PTYPE_INNER_L4_TCP 0x01000000
-/**
- * UDP (User Datagram Protocol) packet type.
- * It is used for inner packet only.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=17, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=17>
- */
-#define RTE_PTYPE_INNER_L4_UDP 0x02000000
-/**
- * Fragmented IP (Internet Protocol) packet type.
- * It is used for inner packet only, and may or maynot have layer 4 packet.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'MF'=1>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=44>
- */
-#define RTE_PTYPE_INNER_L4_FRAG 0x03000000
-/**
- * SCTP (Stream Control Transmission Protocol) packet type.
- * It is used for inner packet only.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=132, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=132>
- */
-#define RTE_PTYPE_INNER_L4_SCTP 0x04000000
-/**
- * ICMP (Internet Control Message Protocol) packet type.
- * It is used for inner packet only.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=1, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=1>
- */
-#define RTE_PTYPE_INNER_L4_ICMP 0x05000000
-/**
- * Non-fragmented IP (Internet Protocol) packet type.
- * It is used for inner packet only, and may or maynot have other unknown layer
- * 4 packet types.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'!=[6|17|44|132|1]>
- */
-#define RTE_PTYPE_INNER_L4_NONFRAG 0x06000000
-/**
- * Mask of inner layer 4 packet types.
- */
-#define RTE_PTYPE_INNER_L4_MASK 0x0f000000
-
-/**
- * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
- * one, bit 4 is selected to be used for IPv4 only. Then checking bit 4 can
- * determine if it is an IPV4 packet.
- */
-#define RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4)
-
-/**
- * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
- * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can
- * determine if it is an IPV4 packet.
- */
-#define RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6)
-
-/* Check if it is a tunneling packet */
-#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) & (RTE_PTYPE_TUNNEL_MASK | \
- RTE_PTYPE_INNER_L2_MASK | \
- RTE_PTYPE_INNER_L3_MASK | \
- RTE_PTYPE_INNER_L4_MASK))
-
/** Alignment constraint of mbuf private area. */
#define RTE_MBUF_PRIV_ALIGN 8
@@ -733,6 +304,20 @@ extern "C" {
const char *rte_get_rx_ol_flag_name(uint64_t mask);
/**
+ * Dump the list of RX offload flags in a buffer
+ *
+ * @param mask
+ * The mask describing the RX flags.
+ * @param buf
+ * The output buffer.
+ * @param buflen
+ * The length of the buffer.
+ * @return
+ * 0 on success, (-1) on error.
+ */
+int rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen);
+
+/**
* Get the name of a TX offload flag
*
* @param mask
@@ -745,6 +330,20 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask);
const char *rte_get_tx_ol_flag_name(uint64_t mask);
/**
+ * Dump the list of TX offload flags in a buffer
+ *
+ * @param mask
+ * The mask describing the TX flags.
+ * @param buf
+ * The output buffer.
+ * @param buflen
+ * The length of the buffer.
+ * @return
+ * 0 on success, (-1) on error.
+ */
+int rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen);
+
+/**
* Some NICs need at least 2KB buffer to RX standard Ethernet frame without
* splitting it into multiple segments.
* So, for mbufs that planned to be involved into RX/TX, the recommended
@@ -756,8 +355,11 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask);
/* define a set of marker types that can be used to refer to set points in the
* mbuf */
+__extension__
typedef void *MARKER[0]; /**< generic marker for a point in a structure */
+__extension__
typedef uint8_t MARKER8[0]; /**< generic marker with 1B alignment */
+__extension__
typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 bytes
* with a single assignment */
@@ -784,6 +386,7 @@ struct rte_mbuf {
* or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC
* config option.
*/
+ RTE_STD_C11
union {
rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */
uint16_t refcnt; /**< Non-atomically accessed refcnt */
@@ -803,6 +406,7 @@ struct rte_mbuf {
* would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
* vlan is stripped from the data.
*/
+ RTE_STD_C11
union {
uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
struct {
@@ -824,6 +428,7 @@ struct rte_mbuf {
union {
uint32_t rss; /**< RSS hash result if RSS enabled */
struct {
+ RTE_STD_C11
union {
struct {
uint16_t hash;
@@ -851,6 +456,7 @@ struct rte_mbuf {
/* second cache line - fields only used in slow path or on TX */
MARKER cacheline1 __rte_cache_min_aligned;
+ RTE_STD_C11
union {
void *userdata; /**< Can be used for external metadata */
uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */
@@ -860,10 +466,15 @@ struct rte_mbuf {
struct rte_mbuf *next; /**< Next segment of scattered packet. */
/* fields to support TX offloads */
+ RTE_STD_C11
union {
uint64_t tx_offload; /**< combined for easy fetch */
+ __extension__
struct {
- uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+ uint64_t l2_len:7;
+ /**< L2 (MAC) Header Length for non-tunneling pkt.
+ * Outer_L4_len + ... + Inner_L2_len for tunneling pkt.
+ */
uint64_t l3_len:9; /**< L3 (IP) Header Length. */
uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
uint64_t tso_segsz:16; /**< TCP TSO segment size */
@@ -1157,13 +768,6 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp)
return m;
}
-/* compat with older versions */
-__rte_deprecated static inline struct rte_mbuf *
-__rte_mbuf_raw_alloc(struct rte_mempool *mp)
-{
- return rte_mbuf_raw_alloc(mp);
-}
-
/**
* @internal Put mbuf back into its original mempool.
* The use of that function is reserved for RTE internal needs.
@@ -1385,6 +989,19 @@ rte_pktmbuf_priv_size(struct rte_mempool *mp)
}
/**
+ * Reset the data_off field of a packet mbuf to its default value.
+ *
+ * The given mbuf must have only one segment, which should be empty.
+ *
+ * @param m
+ * The packet mbuf's data_off field has to be reset.
+ */
+static inline void rte_pktmbuf_reset_headroom(struct rte_mbuf *m)
+{
+ m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len);
+}
+
+/**
* Reset the fields of a packet mbuf to their default values.
*
* The given mbuf must have only one segment.
@@ -1404,8 +1021,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
m->ol_flags = 0;
m->packet_type = 0;
- m->data_off = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ?
- RTE_PKTMBUF_HEADROOM : m->buf_len;
+ rte_pktmbuf_reset_headroom(m);
m->data_len = 0;
__rte_mbuf_sanity_check(m, 1);
@@ -1569,7 +1185,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
m->buf_addr = (char *)m + mbuf_size;
m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size;
m->buf_len = (uint16_t)buf_len;
- m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len);
+ rte_pktmbuf_reset_headroom(m);
m->data_len = 0;
m->ol_flags = 0;
@@ -1958,6 +1574,41 @@ static inline int rte_pktmbuf_is_contiguous(const struct rte_mbuf *m)
}
/**
+ * @internal used by rte_pktmbuf_read().
+ */
+const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off,
+ uint32_t len, void *buf);
+
+/**
+ * Read len data bytes in a mbuf at specified offset.
+ *
+ * If the data is contiguous, return the pointer in the mbuf data, else
+ * copy the data in the buffer provided by the user and return its
+ * pointer.
+ *
+ * @param m
+ * The pointer to the mbuf.
+ * @param off
+ * The offset of the data in the mbuf.
+ * @param len
+ * The amount of bytes to read.
+ * @param buf
+ * The buffer where data is copied if it is not contigous in mbuf
+ * data. Its length should be at least equal to the len parameter.
+ * @return
+ * The pointer to the data, either in the mbuf if it is contiguous,
+ * or in the user buffer. If mbuf is too small, NULL is returned.
+ */
+static inline const void *rte_pktmbuf_read(const struct rte_mbuf *m,
+ uint32_t off, uint32_t len, void *buf)
+{
+ if (likely(off + len <= rte_pktmbuf_data_len(m)))
+ return rte_pktmbuf_mtod_offset(m, char *, off);
+ else
+ return __rte_pktmbuf_read(m, off, len, buf);
+}
+
+/**
* Chain an mbuf to another, thereby creating a segmented packet.
*
* Note: The implementation will do a linear walk over the segments to find
@@ -1996,7 +1647,7 @@ static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail
}
/**
- * Dump an mbuf structure to the console.
+ * Dump an mbuf structure to a file.
*
* Dump all fields for the given packet mbuf and all its associated
* segments (in the case of a chained buffer).
diff --git a/lib/librte_mbuf/rte_mbuf_ptype.c b/lib/librte_mbuf/rte_mbuf_ptype.c
new file mode 100644
index 00000000..e5c4fae3
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf_ptype.c
@@ -0,0 +1,227 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2016 6WIND S.A.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+#include <rte_mbuf_ptype.h>
+
+/* get the name of the l2 packet type */
+const char *rte_get_ptype_l2_name(uint32_t ptype)
+{
+ switch (ptype & RTE_PTYPE_L2_MASK) {
+ case RTE_PTYPE_L2_ETHER: return "L2_ETHER";
+ case RTE_PTYPE_L2_ETHER_TIMESYNC: return "L2_ETHER_TIMESYNC";
+ case RTE_PTYPE_L2_ETHER_ARP: return "L2_ETHER_ARP";
+ case RTE_PTYPE_L2_ETHER_LLDP: return "L2_ETHER_LLDP";
+ case RTE_PTYPE_L2_ETHER_NSH: return "L2_ETHER_NSH";
+ case RTE_PTYPE_L2_ETHER_VLAN: return "L2_ETHER_VLAN";
+ case RTE_PTYPE_L2_ETHER_QINQ: return "L2_ETHER_QINQ";
+ default: return "L2_UNKNOWN";
+ }
+}
+
+/* get the name of the l3 packet type */
+const char *rte_get_ptype_l3_name(uint32_t ptype)
+{
+ switch (ptype & RTE_PTYPE_L3_MASK) {
+ case RTE_PTYPE_L3_IPV4: return "L3_IPV4";
+ case RTE_PTYPE_L3_IPV4_EXT: return "L3_IPV4_EXT";
+ case RTE_PTYPE_L3_IPV6: return "L3_IPV6";
+ case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN: return "L3_IPV4_EXT_UNKNOWN";
+ case RTE_PTYPE_L3_IPV6_EXT: return "L3_IPV6_EXT";
+ case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN: return "L3_IPV6_EXT_UNKNOWN";
+ default: return "L3_UNKNOWN";
+ }
+}
+
+/* get the name of the l4 packet type */
+const char *rte_get_ptype_l4_name(uint32_t ptype)
+{
+ switch (ptype & RTE_PTYPE_L4_MASK) {
+ case RTE_PTYPE_L4_TCP: return "L4_TCP";
+ case RTE_PTYPE_L4_UDP: return "L4_UDP";
+ case RTE_PTYPE_L4_FRAG: return "L4_FRAG";
+ case RTE_PTYPE_L4_SCTP: return "L4_SCTP";
+ case RTE_PTYPE_L4_ICMP: return "L4_ICMP";
+ case RTE_PTYPE_L4_NONFRAG: return "L4_NONFRAG";
+ default: return "L4_UNKNOWN";
+ }
+}
+
+/* get the name of the tunnel packet type */
+const char *rte_get_ptype_tunnel_name(uint32_t ptype)
+{
+ switch (ptype & RTE_PTYPE_TUNNEL_MASK) {
+ case RTE_PTYPE_TUNNEL_IP: return "TUNNEL_IP";
+ case RTE_PTYPE_TUNNEL_GRE: return "TUNNEL_GRE";
+ case RTE_PTYPE_TUNNEL_VXLAN: return "TUNNEL_VXLAN";
+ case RTE_PTYPE_TUNNEL_NVGRE: return "TUNNEL_NVGRE";
+ case RTE_PTYPE_TUNNEL_GENEVE: return "TUNNEL_GENEVE";
+ case RTE_PTYPE_TUNNEL_GRENAT: return "TUNNEL_GRENAT";
+ default: return "TUNNEL_UNKNOWN";
+ }
+}
+
+/* get the name of the inner_l2 packet type */
+const char *rte_get_ptype_inner_l2_name(uint32_t ptype)
+{
+ switch (ptype & RTE_PTYPE_INNER_L2_MASK) {
+ case RTE_PTYPE_INNER_L2_ETHER: return "INNER_L2_ETHER";
+ case RTE_PTYPE_INNER_L2_ETHER_VLAN: return "INNER_L2_ETHER_VLAN";
+ case RTE_PTYPE_INNER_L2_ETHER_QINQ: return "INNER_L2_ETHER_QINQ";
+ default: return "INNER_L2_UNKNOWN";
+ }
+}
+
+/* get the name of the inner_l3 packet type */
+const char *rte_get_ptype_inner_l3_name(uint32_t ptype)
+{
+ switch (ptype & RTE_PTYPE_INNER_L3_MASK) {
+ case RTE_PTYPE_INNER_L3_IPV4: return "INNER_L3_IPV4";
+ case RTE_PTYPE_INNER_L3_IPV4_EXT: return "INNER_L3_IPV4_EXT";
+ case RTE_PTYPE_INNER_L3_IPV6: return "INNER_L3_IPV6";
+ case RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN:
+ return "INNER_L3_IPV4_EXT_UNKNOWN";
+ case RTE_PTYPE_INNER_L3_IPV6_EXT: return "INNER_L3_IPV6_EXT";
+ case RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN:
+ return "INNER_L3_IPV6_EXT_UNKNOWN";
+ default: return "INNER_L3_UNKNOWN";
+ }
+}
+
+/* get the name of the inner_l4 packet type */
+const char *rte_get_ptype_inner_l4_name(uint32_t ptype)
+{
+ switch (ptype & RTE_PTYPE_INNER_L4_MASK) {
+ case RTE_PTYPE_INNER_L4_TCP: return "INNER_L4_TCP";
+ case RTE_PTYPE_INNER_L4_UDP: return "INNER_L4_UDP";
+ case RTE_PTYPE_INNER_L4_FRAG: return "INNER_L4_FRAG";
+ case RTE_PTYPE_INNER_L4_SCTP: return "INNER_L4_SCTP";
+ case RTE_PTYPE_INNER_L4_ICMP: return "INNER_L4_ICMP";
+ case RTE_PTYPE_INNER_L4_NONFRAG: return "INNER_L4_NONFRAG";
+ default: return "INNER_L4_UNKNOWN";
+ }
+}
+
+/* write the packet type name into the buffer */
+int rte_get_ptype_name(uint32_t ptype, char *buf, size_t buflen)
+{
+ int ret;
+
+ if (buflen == 0)
+ return -1;
+
+ buf[0] = '\0';
+ if ((ptype & RTE_PTYPE_ALL_MASK) == RTE_PTYPE_UNKNOWN) {
+ ret = snprintf(buf, buflen, "UNKNOWN");
+ if (ret < 0)
+ return -1;
+ if ((size_t)ret >= buflen)
+ return -1;
+ return 0;
+ }
+
+ if ((ptype & RTE_PTYPE_L2_MASK) != 0) {
+ ret = snprintf(buf, buflen, "%s ",
+ rte_get_ptype_l2_name(ptype));
+ if (ret < 0)
+ return -1;
+ if ((size_t)ret >= buflen)
+ return -1;
+ buf += ret;
+ buflen -= ret;
+ }
+ if ((ptype & RTE_PTYPE_L3_MASK) != 0) {
+ ret = snprintf(buf, buflen, "%s ",
+ rte_get_ptype_l3_name(ptype));
+ if (ret < 0)
+ return -1;
+ if ((size_t)ret >= buflen)
+ return -1;
+ buf += ret;
+ buflen -= ret;
+ }
+ if ((ptype & RTE_PTYPE_L4_MASK) != 0) {
+ ret = snprintf(buf, buflen, "%s ",
+ rte_get_ptype_l4_name(ptype));
+ if (ret < 0)
+ return -1;
+ if ((size_t)ret >= buflen)
+ return -1;
+ buf += ret;
+ buflen -= ret;
+ }
+ if ((ptype & RTE_PTYPE_TUNNEL_MASK) != 0) {
+ ret = snprintf(buf, buflen, "%s ",
+ rte_get_ptype_tunnel_name(ptype));
+ if (ret < 0)
+ return -1;
+ if ((size_t)ret >= buflen)
+ return -1;
+ buf += ret;
+ buflen -= ret;
+ }
+ if ((ptype & RTE_PTYPE_INNER_L2_MASK) != 0) {
+ ret = snprintf(buf, buflen, "%s ",
+ rte_get_ptype_inner_l2_name(ptype));
+ if (ret < 0)
+ return -1;
+ if ((size_t)ret >= buflen)
+ return -1;
+ buf += ret;
+ buflen -= ret;
+ }
+ if ((ptype & RTE_PTYPE_INNER_L3_MASK) != 0) {
+ ret = snprintf(buf, buflen, "%s ",
+ rte_get_ptype_inner_l3_name(ptype));
+ if (ret < 0)
+ return -1;
+ if ((size_t)ret >= buflen)
+ return -1;
+ buf += ret;
+ buflen -= ret;
+ }
+ if ((ptype & RTE_PTYPE_INNER_L4_MASK) != 0) {
+ ret = snprintf(buf, buflen, "%s ",
+ rte_get_ptype_inner_l4_name(ptype));
+ if (ret < 0)
+ return -1;
+ if ((size_t)ret >= buflen)
+ return -1;
+ buf += ret;
+ buflen -= ret;
+ }
+
+ return 0;
+}
diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h
new file mode 100644
index 00000000..ff6de9d1
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf_ptype.h
@@ -0,0 +1,668 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation.
+ * Copyright 2014-2016 6WIND S.A.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MBUF_PTYPE_H_
+#define _RTE_MBUF_PTYPE_H_
+
+/**
+ * @file
+ * RTE Mbuf Packet Types
+ *
+ * This file contains declarations for features related to mbuf packet
+ * types. The packet type gives information about the data carried by the
+ * mbuf, and is stored in the mbuf in a 32 bits field.
+ *
+ * The 32 bits are divided into several fields to mark packet types. Note that
+ * each field is indexical.
+ * - Bit 3:0 is for L2 types.
+ * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types.
+ * - Bit 11:8 is for L4 or outer L4 (for tunneling case) types.
+ * - Bit 15:12 is for tunnel types.
+ * - Bit 19:16 is for inner L2 types.
+ * - Bit 23:20 is for inner L3 types.
+ * - Bit 27:24 is for inner L4 types.
+ * - Bit 31:28 is reserved.
+ *
+ * To be compatible with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT,
+ * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP
+ * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits.
+ *
+ * Note that L3 types values are selected for checking IPV4/IPV6 header from
+ * performance point of view. Reading annotations of RTE_ETH_IS_IPV4_HDR and
+ * RTE_ETH_IS_IPV6_HDR is needed for any future changes of L3 type values.
+ *
+ * Note that the packet types of the same packet recognized by different
+ * hardware may be different, as different hardware may have different
+ * capability of packet type recognition.
+ *
+ * examples:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=0x29
+ * | 'version'=6, 'next header'=0x3A
+ * | 'ICMPv6 header'>
+ * will be recognized on i40e hardware as packet type combination of,
+ * RTE_PTYPE_L2_ETHER |
+ * RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ * RTE_PTYPE_TUNNEL_IP |
+ * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+ * RTE_PTYPE_INNER_L4_ICMP.
+ *
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=0x2F
+ * | 'GRE header'
+ * | 'version'=6, 'next header'=0x11
+ * | 'UDP header'>
+ * will be recognized on i40e hardware as packet type combination of,
+ * RTE_PTYPE_L2_ETHER |
+ * RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ * RTE_PTYPE_TUNNEL_GRENAT |
+ * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+ * RTE_PTYPE_INNER_L4_UDP.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * No packet type information.
+ */
+#define RTE_PTYPE_UNKNOWN 0x00000000
+/**
+ * Ethernet packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=[0x0800|0x86DD]>
+ */
+#define RTE_PTYPE_L2_ETHER 0x00000001
+/**
+ * Ethernet packet type for time sync.
+ *
+ * Packet format:
+ * <'ether type'=0x88F7>
+ */
+#define RTE_PTYPE_L2_ETHER_TIMESYNC 0x00000002
+/**
+ * ARP (Address Resolution Protocol) packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0806>
+ */
+#define RTE_PTYPE_L2_ETHER_ARP 0x00000003
+/**
+ * LLDP (Link Layer Discovery Protocol) packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x88CC>
+ */
+#define RTE_PTYPE_L2_ETHER_LLDP 0x00000004
+/**
+ * NSH (Network Service Header) packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x894F>
+ */
+#define RTE_PTYPE_L2_ETHER_NSH 0x00000005
+/**
+ * VLAN packet type.
+ *
+ * Packet format:
+ * <'ether type'=[0x8100]>
+ */
+#define RTE_PTYPE_L2_ETHER_VLAN 0x00000006
+/**
+ * QinQ packet type.
+ *
+ * Packet format:
+ * <'ether type'=[0x88A8]>
+ */
+#define RTE_PTYPE_L2_ETHER_QINQ 0x00000007
+/**
+ * Mask of layer 2 packet types.
+ * It is used for outer packet for tunneling cases.
+ */
+#define RTE_PTYPE_L2_MASK 0x0000000f
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for outer packet for tunneling cases, and does not contain any
+ * header option.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=5>
+ */
+#define RTE_PTYPE_L3_IPV4 0x00000010
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for outer packet for tunneling cases, and contains header
+ * options.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=[6-15], 'options'>
+ */
+#define RTE_PTYPE_L3_IPV4_EXT 0x00000030
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for outer packet for tunneling cases, and does not contain any
+ * extension header.
+ *
+ * Packet format:
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=0x3B>
+ */
+#define RTE_PTYPE_L3_IPV6 0x00000040
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for outer packet for tunneling cases, and may or maynot contain
+ * header options.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=[5-15], <'options'>>
+ */
+#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN 0x00000090
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for outer packet for tunneling cases, and contains extension
+ * headers.
+ *
+ * Packet format:
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
+ * 'extension headers'>
+ */
+#define RTE_PTYPE_L3_IPV6_EXT 0x000000c0
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for outer packet for tunneling cases, and may or maynot contain
+ * extension headers.
+ *
+ * Packet format:
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
+ * <'extension headers'>>
+ */
+#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN 0x000000e0
+/**
+ * Mask of layer 3 packet types.
+ * It is used for outer packet for tunneling cases.
+ */
+#define RTE_PTYPE_L3_MASK 0x000000f0
+/**
+ * TCP (Transmission Control Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=6, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=6>
+ */
+#define RTE_PTYPE_L4_TCP 0x00000100
+/**
+ * UDP (User Datagram Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17>
+ */
+#define RTE_PTYPE_L4_UDP 0x00000200
+/**
+ * Fragmented IP (Internet Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * It refers to those packets of any IP types, which can be recognized as
+ * fragmented. A fragmented packet cannot be recognized as any other L4 types
+ * (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP,
+ * RTE_PTYPE_L4_NONFRAG).
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'MF'=1>
+ * or,
+ * <'ether type'=0x0800
+ * | 'version'=4, 'frag_offset'!=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=44>
+ */
+#define RTE_PTYPE_L4_FRAG 0x00000300
+/**
+ * SCTP (Stream Control Transmission Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=132, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=132>
+ */
+#define RTE_PTYPE_L4_SCTP 0x00000400
+/**
+ * ICMP (Internet Control Message Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=1, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=1>
+ */
+#define RTE_PTYPE_L4_ICMP 0x00000500
+/**
+ * Non-fragmented IP (Internet Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * It refers to those packets of any IP types, while cannot be recognized as
+ * any of above L4 types (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP,
+ * RTE_PTYPE_L4_FRAG, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP).
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'!=[6|17|44|132|1]>
+ */
+#define RTE_PTYPE_L4_NONFRAG 0x00000600
+/**
+ * Mask of layer 4 packet types.
+ * It is used for outer packet for tunneling cases.
+ */
+#define RTE_PTYPE_L4_MASK 0x00000f00
+/**
+ * IP (Internet Protocol) in IP (Internet Protocol) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=[4|41]>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[4|41]>
+ */
+#define RTE_PTYPE_TUNNEL_IP 0x00001000
+/**
+ * GRE (Generic Routing Encapsulation) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=47>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=47>
+ */
+#define RTE_PTYPE_TUNNEL_GRE 0x00002000
+/**
+ * VXLAN (Virtual eXtensible Local Area Network) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17
+ * | 'destination port'=4798>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17
+ * | 'destination port'=4798>
+ */
+#define RTE_PTYPE_TUNNEL_VXLAN 0x00003000
+/**
+ * NVGRE (Network Virtualization using Generic Routing Encapsulation) tunneling
+ * packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=47
+ * | 'protocol type'=0x6558>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=47
+ * | 'protocol type'=0x6558'>
+ */
+#define RTE_PTYPE_TUNNEL_NVGRE 0x00004000
+/**
+ * GENEVE (Generic Network Virtualization Encapsulation) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17
+ * | 'destination port'=6081>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17
+ * | 'destination port'=6081>
+ */
+#define RTE_PTYPE_TUNNEL_GENEVE 0x00005000
+/**
+ * Tunneling packet type of Teredo, VXLAN (Virtual eXtensible Local Area
+ * Network) or GRE (Generic Routing Encapsulation) could be recognized as this
+ * packet type, if they can not be recognized independently as of hardware
+ * capability.
+ */
+#define RTE_PTYPE_TUNNEL_GRENAT 0x00006000
+/**
+ * Mask of tunneling packet types.
+ */
+#define RTE_PTYPE_TUNNEL_MASK 0x0000f000
+/**
+ * Ethernet packet type.
+ * It is used for inner packet type only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=[0x800|0x86DD]>
+ */
+#define RTE_PTYPE_INNER_L2_ETHER 0x00010000
+/**
+ * Ethernet packet type with VLAN (Virtual Local Area Network) tag.
+ *
+ * Packet format (inner only):
+ * <'ether type'=[0x800|0x86DD], vlan=[1-4095]>
+ */
+#define RTE_PTYPE_INNER_L2_ETHER_VLAN 0x00020000
+/**
+ * QinQ packet type.
+ *
+ * Packet format:
+ * <'ether type'=[0x88A8]>
+ */
+#define RTE_PTYPE_INNER_L2_ETHER_QINQ 0x00030000
+/**
+ * Mask of inner layer 2 packet types.
+ */
+#define RTE_PTYPE_INNER_L2_MASK 0x000f0000
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for inner packet only, and does not contain any header option.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=5>
+ */
+#define RTE_PTYPE_INNER_L3_IPV4 0x00100000
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for inner packet only, and contains header options.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=[6-15], 'options'>
+ */
+#define RTE_PTYPE_INNER_L3_IPV4_EXT 0x00200000
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for inner packet only, and does not contain any extension header.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=0x3B>
+ */
+#define RTE_PTYPE_INNER_L3_IPV6 0x00300000
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for inner packet only, and may or maynot contain header options.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=[5-15], <'options'>>
+ */
+#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for inner packet only, and contains extension headers.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
+ * 'extension headers'>
+ */
+#define RTE_PTYPE_INNER_L3_IPV6_EXT 0x00500000
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for inner packet only, and may or maynot contain extension
+ * headers.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
+ * <'extension headers'>>
+ */
+#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000
+/**
+ * Mask of inner layer 3 packet types.
+ */
+#define RTE_PTYPE_INNER_L3_MASK 0x00f00000
+/**
+ * TCP (Transmission Control Protocol) packet type.
+ * It is used for inner packet only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=6, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=6>
+ */
+#define RTE_PTYPE_INNER_L4_TCP 0x01000000
+/**
+ * UDP (User Datagram Protocol) packet type.
+ * It is used for inner packet only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17>
+ */
+#define RTE_PTYPE_INNER_L4_UDP 0x02000000
+/**
+ * Fragmented IP (Internet Protocol) packet type.
+ * It is used for inner packet only, and may or maynot have layer 4 packet.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'MF'=1>
+ * or,
+ * <'ether type'=0x0800
+ * | 'version'=4, 'frag_offset'!=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=44>
+ */
+#define RTE_PTYPE_INNER_L4_FRAG 0x03000000
+/**
+ * SCTP (Stream Control Transmission Protocol) packet type.
+ * It is used for inner packet only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=132, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=132>
+ */
+#define RTE_PTYPE_INNER_L4_SCTP 0x04000000
+/**
+ * ICMP (Internet Control Message Protocol) packet type.
+ * It is used for inner packet only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=1, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=1>
+ */
+#define RTE_PTYPE_INNER_L4_ICMP 0x05000000
+/**
+ * Non-fragmented IP (Internet Protocol) packet type.
+ * It is used for inner packet only, and may or maynot have other unknown layer
+ * 4 packet types.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'!=[6|17|44|132|1]>
+ */
+#define RTE_PTYPE_INNER_L4_NONFRAG 0x06000000
+/**
+ * Mask of inner layer 4 packet types.
+ */
+#define RTE_PTYPE_INNER_L4_MASK 0x0f000000
+/**
+ * All valid layer masks.
+ */
+#define RTE_PTYPE_ALL_MASK 0x0fffffff
+
+/**
+ * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
+ * one, bit 4 is selected to be used for IPv4 only. Then checking bit 4 can
+ * determine if it is an IPV4 packet.
+ */
+#define RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4)
+
+/**
+ * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
+ * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can
+ * determine if it is an IPV4 packet.
+ */
+#define RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6)
+
+/* Check if it is a tunneling packet */
+#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) & \
+ (RTE_PTYPE_TUNNEL_MASK | \
+ RTE_PTYPE_INNER_L2_MASK | \
+ RTE_PTYPE_INNER_L3_MASK | \
+ RTE_PTYPE_INNER_L4_MASK))
+
+/**
+ * Get the name of the l2 packet type
+ *
+ * @param ptype
+ * The packet type value.
+ * @return
+ * A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_l2_name(uint32_t ptype);
+
+/**
+ * Get the name of the l3 packet type
+ *
+ * @param ptype
+ * The packet type value.
+ * @return
+ * A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_l3_name(uint32_t ptype);
+
+/**
+ * Get the name of the l4 packet type
+ *
+ * @param ptype
+ * The packet type value.
+ * @return
+ * A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_l4_name(uint32_t ptype);
+
+/**
+ * Get the name of the tunnel packet type
+ *
+ * @param ptype
+ * The packet type value.
+ * @return
+ * A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_tunnel_name(uint32_t ptype);
+
+/**
+ * Get the name of the inner_l2 packet type
+ *
+ * @param ptype
+ * The packet type value.
+ * @return
+ * A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_inner_l2_name(uint32_t ptype);
+
+/**
+ * Get the name of the inner_l3 packet type
+ *
+ * @param ptype
+ * The packet type value.
+ * @return
+ * A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_inner_l3_name(uint32_t ptype);
+
+/**
+ * Get the name of the inner_l4 packet type
+ *
+ * @param ptype
+ * The packet type value.
+ * @return
+ * A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_inner_l4_name(uint32_t ptype);
+
+/**
+ * Write the packet type name into the buffer
+ *
+ * @param ptype
+ * The packet type value.
+ * @param buf
+ * The buffer where the string is written.
+ * @param buflen
+ * The length of the buffer.
+ * @return
+ * - 0 on success
+ * - (-1) if the buffer is too small
+ */
+int rte_get_ptype_name(uint32_t ptype, char *buf, size_t buflen);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MBUF_PTYPE_H_ */
diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
index e10f6bdc..6e2ea845 100644
--- a/lib/librte_mbuf/rte_mbuf_version.map
+++ b/lib/librte_mbuf/rte_mbuf_version.map
@@ -18,3 +18,20 @@ DPDK_2.1 {
rte_pktmbuf_pool_create;
} DPDK_2.0;
+
+DPDK_16.11 {
+ global:
+
+ __rte_pktmbuf_read;
+ rte_get_ptype_inner_l2_name;
+ rte_get_ptype_inner_l3_name;
+ rte_get_ptype_inner_l4_name;
+ rte_get_ptype_l2_name;
+ rte_get_ptype_l3_name;
+ rte_get_ptype_l4_name;
+ rte_get_ptype_name;
+ rte_get_ptype_tunnel_name;
+ rte_get_rx_ol_flag_list;
+ rte_get_tx_ol_flag_list;
+
+} DPDK_2.1;
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index ad7c470e..aa513b97 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -55,7 +55,6 @@
#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_branch_prediction.h>
-#include <rte_ring.h>
#include <rte_errno.h>
#include <rte_string_fns.h>
#include <rte_spinlock.h>
@@ -911,9 +910,8 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
/*
* Create the mempool over already allocated chunk of memory.
* That external memory buffer can consists of physically disjoint pages.
- * Setting vaddr to NULL, makes mempool to fallback to original behaviour
- * and allocate space for mempool and it's elements as one big chunk of
- * physically continuos memory.
+ * Setting vaddr to NULL, makes mempool to fallback to rte_mempool_create()
+ * behavior.
*/
struct rte_mempool *
rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 059ad9e5..440f3b1b 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -75,6 +75,7 @@
#include <rte_branch_prediction.h>
#include <rte_ring.h>
#include <rte_memcpy.h>
+#include <rte_common.h>
#ifdef __cplusplus
extern "C" {
@@ -216,6 +217,7 @@ struct rte_mempool {
* RTE_MEMPOOL_NAMESIZE next time the ABI changes
*/
char name[RTE_MEMZONE_NAMESIZE]; /**< Name of mempool. */
+ RTE_STD_C11
union {
void *pool_data; /**< Ring or pool to store objects. */
uint64_t pool_id; /**< External mempool identifier. */
@@ -587,10 +589,8 @@ typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *);
/**
* Create a new mempool named *name* in memory.
*
- * This function uses ``memzone_reserve()`` to allocate memory. The
+ * This function uses ``rte_memzone_reserve()`` to allocate memory. The
* pool contains n elements of elt_size. Its size is set to n.
- * All elements of the mempool are allocated together with the mempool header,
- * in one physically continuous chunk of memory.
*
* @param name
* The name of the mempool.
@@ -746,7 +746,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
*
* The mempool is allocated and initialized, but it is not populated: no
* memory is allocated for the mempool elements. The user has to call
- * rte_mempool_populate_*() or to add memory chunks to the pool. Once
+ * rte_mempool_populate_*() to add memory chunks to the pool. Once
* populated, the user may also want to initialize each object with
* rte_mempool_obj_iter().
*
@@ -798,6 +798,10 @@ rte_mempool_free(struct rte_mempool *mp);
* Add a virtually and physically contiguous memory chunk in the pool
* where objects can be instanciated.
*
+ * If the given physical address is unknown (paddr = RTE_BAD_PHYS_ADDR),
+ * the chunk doesn't need to be physically contiguous (only virtually),
+ * and allocated objects may span two pages.
+ *
* @param mp
* A pointer to the mempool structure.
* @param vaddr
@@ -946,7 +950,7 @@ uint32_t rte_mempool_mem_iter(struct rte_mempool *mp,
rte_mempool_mem_cb_t *mem_cb, void *mem_cb_arg);
/**
- * Dump the status of the mempool to the console.
+ * Dump the status of the mempool to a file.
*
* @param f
* A pointer to a file for output
diff --git a/lib/librte_meter/rte_meter.h b/lib/librte_meter/rte_meter.h
index 2cd8d814..2ab71849 100644
--- a/lib/librte_meter/rte_meter.h
+++ b/lib/librte_meter/rte_meter.h
@@ -232,13 +232,15 @@ rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m,
n_periods = time_diff / m->cir_period;
m->time += n_periods * m->cir_period;
+ /* Put the tokens overflowing from tc into te bucket */
tc = m->tc + n_periods * m->cir_bytes_per_period;
- if (tc > m->cbs)
+ te = m->te;
+ if (tc > m->cbs) {
+ te += (tc - m->cbs);
+ if (te > m->ebs)
+ te = m->ebs;
tc = m->cbs;
-
- te = m->te + n_periods * m->cir_bytes_per_period;
- if (te > m->ebs)
- te = m->ebs;
+ }
/* Color logic */
if (tc >= pkt_len) {
@@ -271,13 +273,15 @@ rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m,
n_periods = time_diff / m->cir_period;
m->time += n_periods * m->cir_period;
+ /* Put the tokens overflowing from tc into te bucket */
tc = m->tc + n_periods * m->cir_bytes_per_period;
- if (tc > m->cbs)
+ te = m->te;
+ if (tc > m->cbs) {
+ te += (tc - m->cbs);
+ if (te > m->ebs)
+ te = m->ebs;
tc = m->cbs;
-
- te = m->te + n_periods * m->cir_bytes_per_period;
- if (te > m->ebs)
- te = m->ebs;
+ }
/* Color logic */
if ((pkt_color == e_RTE_METER_GREEN) && (tc >= pkt_len)) {
diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index ad2e482d..20cf6644 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -31,10 +31,20 @@
include $(RTE_SDK)/mk/rte.vars.mk
+LIB = librte_net.a
+
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+EXPORT_MAP := rte_net_version.map
+LIBABIVER := 1
+
+SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
+
# install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_sctp.h rte_icmp.h rte_arp.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_sctp.h rte_icmp.h rte_arp.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_ether.h rte_gre.h rte_net.h
+DEPDIRS-$(CONFIG_RTE_LIBRTE_NET) += lib/librte_eal lib/librte_mbuf
-include $(RTE_SDK)/mk/rte.install.mk
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_net/rte_ether.h
index 1d62d8e5..ff3d0654 100644
--- a/lib/librte_ether/rte_ether.h
+++ b/lib/librte_net/rte_ether.h
@@ -84,7 +84,7 @@ extern "C" {
* See http://standards.ieee.org/regauth/groupmac/tutorial.html
*/
struct ether_addr {
- uint8_t addr_bytes[ETHER_ADDR_LEN]; /**< Address bytes in transmission order */
+ uint8_t addr_bytes[ETHER_ADDR_LEN]; /**< Addr bytes in tx order */
} __attribute__((__packed__));
#define ETHER_LOCAL_ADMIN_ADDR 0x02 /**< Locally assigned Eth. address. */
@@ -224,7 +224,7 @@ static inline int is_local_admin_ether_addr(const struct ether_addr *ea)
*/
static inline int is_valid_assigned_ether_addr(const struct ether_addr *ea)
{
- return is_unicast_ether_addr(ea) && (! is_zero_ether_addr(ea));
+ return is_unicast_ether_addr(ea) && (!is_zero_ether_addr(ea));
}
/**
@@ -236,7 +236,7 @@ static inline int is_valid_assigned_ether_addr(const struct ether_addr *ea)
static inline void eth_random_addr(uint8_t *addr)
{
uint64_t rand = rte_rand();
- uint8_t *p = (uint8_t*)&rand;
+ uint8_t *p = (uint8_t *)&rand;
rte_memcpy(addr, p, ETHER_ADDR_LEN);
addr[0] &= ~ETHER_GROUP_ADDR; /* clear multicast bit */
@@ -329,6 +329,7 @@ struct vxlan_hdr {
#define ETHER_TYPE_ARP 0x0806 /**< Arp Protocol. */
#define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */
#define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
+#define ETHER_TYPE_QINQ 0x88A8 /**< IEEE 802.1ad QinQ tagging. */
#define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */
#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */
#define ETHER_TYPE_TEB 0x6558 /**< Transparent Ethernet Bridging. */
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_net/rte_gre.h
index e1b967b8..46568ff5 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.h
+++ b/lib/librte_net/rte_gre.h
@@ -1,8 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
+ * Copyright 2016 6WIND S.A.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -31,32 +30,42 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef _VIRTIO_NET_USER_H
-#define _VIRTIO_NET_USER_H
+#ifndef _RTE_GRE_H_
+#define _RTE_GRE_H_
-#include "vhost-net.h"
-#include "vhost-net-user.h"
+#include <stdint.h>
+#include <rte_byteorder.h>
-#define VHOST_USER_PROTOCOL_F_MQ 0
-#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
-#define VHOST_USER_PROTOCOL_F_RARP 2
-
-#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
- (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
- (1ULL << VHOST_USER_PROTOCOL_F_RARP))
-
-int user_set_mem_table(int, struct VhostUserMsg *);
-
-void user_set_vring_call(int, struct VhostUserMsg *);
-
-void user_set_vring_kick(int, struct VhostUserMsg *);
-
-void user_set_protocol_features(int vid, uint64_t protocol_features);
-int user_set_log_base(int vid, struct VhostUserMsg *);
-int user_send_rarp(int vid, struct VhostUserMsg *);
-
-int user_get_vring_base(int, struct vhost_vring_state *);
+#ifdef __cplusplus
+extern "C" {
+#endif
-int user_set_vring_enable(int vid, struct vhost_vring_state *state);
+/**
+ * GRE Header
+ */
+struct gre_hdr {
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+ uint16_t res2:4; /**< Reserved */
+ uint16_t s:1; /**< Sequence Number Present bit */
+ uint16_t k:1; /**< Key Present bit */
+ uint16_t res1:1; /**< Reserved */
+ uint16_t c:1; /**< Checksum Present bit */
+ uint16_t ver:3; /**< Version Number */
+ uint16_t res3:5; /**< Reserved */
+#elif RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ uint16_t c:1; /**< Checksum Present bit */
+ uint16_t res1:1; /**< Reserved */
+ uint16_t k:1; /**< Key Present bit */
+ uint16_t s:1; /**< Sequence Number Present bit */
+ uint16_t res2:4; /**< Reserved */
+ uint16_t res3:5; /**< Reserved */
+ uint16_t ver:3; /**< Version Number */
+#endif
+ uint16_t proto; /**< Protocol Type */
+} __attribute__((__packed__));
+#ifdef __cplusplus
+}
#endif
+
+#endif /* RTE_GRE_H_ */
diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h
index 5b7554ab..4491b86e 100644
--- a/lib/librte_net/rte_ip.h
+++ b/lib/librte_net/rte_ip.h
@@ -230,6 +230,77 @@ rte_raw_cksum(const void *buf, size_t len)
}
/**
+ * Compute the raw (non complemented) checksum of a packet.
+ *
+ * @param m
+ * The pointer to the mbuf.
+ * @param off
+ * The offset in bytes to start the checksum.
+ * @param len
+ * The length in bytes of the data to ckecksum.
+ * @param cksum
+ * A pointer to the checksum, filled on success.
+ * @return
+ * 0 on success, -1 on error (bad length or offset).
+ */
+static inline int
+rte_raw_cksum_mbuf(const struct rte_mbuf *m, uint32_t off, uint32_t len,
+ uint16_t *cksum)
+{
+ const struct rte_mbuf *seg;
+ const char *buf;
+ uint32_t sum, tmp;
+ uint32_t seglen, done;
+
+ /* easy case: all data in the first segment */
+ if (off + len <= rte_pktmbuf_data_len(m)) {
+ *cksum = rte_raw_cksum(rte_pktmbuf_mtod_offset(m,
+ const char *, off), len);
+ return 0;
+ }
+
+ if (unlikely(off + len > rte_pktmbuf_pkt_len(m)))
+ return -1; /* invalid params, return a dummy value */
+
+ /* else browse the segment to find offset */
+ seglen = 0;
+ for (seg = m; seg != NULL; seg = seg->next) {
+ seglen = rte_pktmbuf_data_len(seg);
+ if (off < seglen)
+ break;
+ off -= seglen;
+ }
+ seglen -= off;
+ buf = rte_pktmbuf_mtod_offset(seg, const char *, off);
+ if (seglen >= len) {
+ /* all in one segment */
+ *cksum = rte_raw_cksum(buf, len);
+ return 0;
+ }
+
+ /* hard case: process checksum of several segments */
+ sum = 0;
+ done = 0;
+ for (;;) {
+ tmp = __rte_raw_cksum(buf, seglen, 0);
+ if (done & 1)
+ tmp = rte_bswap16(tmp);
+ sum += tmp;
+ done += seglen;
+ if (done == len)
+ break;
+ seg = seg->next;
+ buf = rte_pktmbuf_mtod(seg, const char *);
+ seglen = rte_pktmbuf_data_len(seg);
+ if (seglen > len - done)
+ seglen = len - done;
+ }
+
+ *cksum = __rte_raw_cksum_reduce(sum);
+ return 0;
+}
+
+/**
* Process the IPv4 checksum of an IPv4 header.
*
* The checksum field must be set to 0 by the caller.
diff --git a/lib/librte_net/rte_net.c b/lib/librte_net/rte_net.c
new file mode 100644
index 00000000..a8c7aff9
--- /dev/null
+++ b/lib/librte_net/rte_net.c
@@ -0,0 +1,517 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2016 6WIND S.A.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+#include <rte_mbuf_ptype.h>
+#include <rte_byteorder.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_sctp.h>
+#include <rte_gre.h>
+#include <rte_net.h>
+
+/* get l3 packet type from ip6 next protocol */
+static uint32_t
+ptype_l3_ip6(uint8_t ip6_proto)
+{
+ static const uint32_t ip6_ext_proto_map[256] = {
+ [IPPROTO_HOPOPTS] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+ [IPPROTO_ROUTING] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+ [IPPROTO_FRAGMENT] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+ [IPPROTO_ESP] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+ [IPPROTO_AH] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+ [IPPROTO_DSTOPTS] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+ };
+
+ return RTE_PTYPE_L3_IPV6 + ip6_ext_proto_map[ip6_proto];
+}
+
+/* get l3 packet type from ip version and header length */
+static uint32_t
+ptype_l3_ip(uint8_t ipv_ihl)
+{
+ static const uint32_t ptype_l3_ip_proto_map[256] = {
+ [0x45] = RTE_PTYPE_L3_IPV4,
+ [0x46] = RTE_PTYPE_L3_IPV4_EXT,
+ [0x47] = RTE_PTYPE_L3_IPV4_EXT,
+ [0x48] = RTE_PTYPE_L3_IPV4_EXT,
+ [0x49] = RTE_PTYPE_L3_IPV4_EXT,
+ [0x4A] = RTE_PTYPE_L3_IPV4_EXT,
+ [0x4B] = RTE_PTYPE_L3_IPV4_EXT,
+ [0x4C] = RTE_PTYPE_L3_IPV4_EXT,
+ [0x4D] = RTE_PTYPE_L3_IPV4_EXT,
+ [0x4E] = RTE_PTYPE_L3_IPV4_EXT,
+ [0x4F] = RTE_PTYPE_L3_IPV4_EXT,
+ };
+
+ return ptype_l3_ip_proto_map[ipv_ihl];
+}
+
+/* get l4 packet type from proto */
+static uint32_t
+ptype_l4(uint8_t proto)
+{
+ static const uint32_t ptype_l4_proto[256] = {
+ [IPPROTO_UDP] = RTE_PTYPE_L4_UDP,
+ [IPPROTO_TCP] = RTE_PTYPE_L4_TCP,
+ [IPPROTO_SCTP] = RTE_PTYPE_L4_SCTP,
+ };
+
+ return ptype_l4_proto[proto];
+}
+
+/* get inner l3 packet type from ip6 next protocol */
+static uint32_t
+ptype_inner_l3_ip6(uint8_t ip6_proto)
+{
+ static const uint32_t ptype_inner_ip6_ext_proto_map[256] = {
+ [IPPROTO_HOPOPTS] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+ RTE_PTYPE_INNER_L3_IPV6,
+ [IPPROTO_ROUTING] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+ RTE_PTYPE_INNER_L3_IPV6,
+ [IPPROTO_FRAGMENT] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+ RTE_PTYPE_INNER_L3_IPV6,
+ [IPPROTO_ESP] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+ RTE_PTYPE_INNER_L3_IPV6,
+ [IPPROTO_AH] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+ RTE_PTYPE_INNER_L3_IPV6,
+ [IPPROTO_DSTOPTS] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+ RTE_PTYPE_INNER_L3_IPV6,
+ };
+
+ return RTE_PTYPE_INNER_L3_IPV6 +
+ ptype_inner_ip6_ext_proto_map[ip6_proto];
+}
+
+/* get inner l3 packet type from ip version and header length */
+static uint32_t
+ptype_inner_l3_ip(uint8_t ipv_ihl)
+{
+ static const uint32_t ptype_inner_l3_ip_proto_map[256] = {
+ [0x45] = RTE_PTYPE_INNER_L3_IPV4,
+ [0x46] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+ [0x47] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+ [0x48] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+ [0x49] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+ [0x4A] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+ [0x4B] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+ [0x4C] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+ [0x4D] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+ [0x4E] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+ [0x4F] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+ };
+
+ return ptype_inner_l3_ip_proto_map[ipv_ihl];
+}
+
+/* get inner l4 packet type from proto */
+static uint32_t
+ptype_inner_l4(uint8_t proto)
+{
+ static const uint32_t ptype_inner_l4_proto[256] = {
+ [IPPROTO_UDP] = RTE_PTYPE_INNER_L4_UDP,
+ [IPPROTO_TCP] = RTE_PTYPE_INNER_L4_TCP,
+ [IPPROTO_SCTP] = RTE_PTYPE_INNER_L4_SCTP,
+ };
+
+ return ptype_inner_l4_proto[proto];
+}
+
+/* get the tunnel packet type if any, update proto and off. */
+static uint32_t
+ptype_tunnel(uint16_t *proto, const struct rte_mbuf *m,
+ uint32_t *off)
+{
+ switch (*proto) {
+ case IPPROTO_GRE: {
+ static const uint8_t opt_len[16] = {
+ [0x0] = 4,
+ [0x1] = 8,
+ [0x2] = 8,
+ [0x8] = 8,
+ [0x3] = 12,
+ [0x9] = 12,
+ [0xa] = 12,
+ [0xb] = 16,
+ };
+ const struct gre_hdr *gh;
+ struct gre_hdr gh_copy;
+ uint16_t flags;
+
+ gh = rte_pktmbuf_read(m, *off, sizeof(*gh), &gh_copy);
+ if (unlikely(gh == NULL))
+ return 0;
+
+ flags = rte_be_to_cpu_16(*(const uint16_t *)gh);
+ flags >>= 12;
+ if (opt_len[flags] == 0)
+ return 0;
+
+ *off += opt_len[flags];
+ *proto = gh->proto;
+ if (*proto == rte_cpu_to_be_16(ETHER_TYPE_TEB))
+ return RTE_PTYPE_TUNNEL_NVGRE;
+ else
+ return RTE_PTYPE_TUNNEL_GRE;
+ }
+ case IPPROTO_IPIP:
+ *proto = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+ return RTE_PTYPE_TUNNEL_IP;
+ case IPPROTO_IPV6:
+ *proto = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+ return RTE_PTYPE_TUNNEL_IP; /* IP is also valid for IPv6 */
+ default:
+ return 0;
+ }
+}
+
+/* get the ipv4 header length */
+static uint8_t
+ip4_hlen(const struct ipv4_hdr *hdr)
+{
+ return (hdr->version_ihl & 0xf) * 4;
+}
+
+/* parse ipv6 extended headers, update offset and return next proto */
+static uint16_t
+skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off,
+ int *frag)
+{
+ struct ext_hdr {
+ uint8_t next_hdr;
+ uint8_t len;
+ };
+ const struct ext_hdr *xh;
+ struct ext_hdr xh_copy;
+ unsigned int i;
+
+ *frag = 0;
+
+#define MAX_EXT_HDRS 5
+ for (i = 0; i < MAX_EXT_HDRS; i++) {
+ switch (proto) {
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+ xh = rte_pktmbuf_read(m, *off, sizeof(*xh),
+ &xh_copy);
+ if (xh == NULL)
+ return 0;
+ *off += (xh->len + 1) * 8;
+ proto = xh->next_hdr;
+ break;
+ case IPPROTO_FRAGMENT:
+ xh = rte_pktmbuf_read(m, *off, sizeof(*xh),
+ &xh_copy);
+ if (xh == NULL)
+ return 0;
+ *off += 8;
+ proto = xh->next_hdr;
+ *frag = 1;
+ return proto; /* this is always the last ext hdr */
+ case IPPROTO_NONE:
+ return 0;
+ default:
+ return proto;
+ }
+ }
+ return 0;
+}
+
+/* parse mbuf data to get packet type */
+uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
+ struct rte_net_hdr_lens *hdr_lens, uint32_t layers)
+{
+ struct rte_net_hdr_lens local_hdr_lens;
+ const struct ether_hdr *eh;
+ struct ether_hdr eh_copy;
+ uint32_t pkt_type = RTE_PTYPE_L2_ETHER;
+ uint32_t off = 0;
+ uint16_t proto;
+
+ if (hdr_lens == NULL)
+ hdr_lens = &local_hdr_lens;
+
+ eh = rte_pktmbuf_read(m, off, sizeof(*eh), &eh_copy);
+ if (unlikely(eh == NULL))
+ return 0;
+ proto = eh->ether_type;
+ off = sizeof(*eh);
+ hdr_lens->l2_len = off;
+
+ if ((layers & RTE_PTYPE_L2_MASK) == 0)
+ return 0;
+
+ if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
+ goto l3; /* fast path if packet is IPv4 */
+
+ if (proto == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) {
+ const struct vlan_hdr *vh;
+ struct vlan_hdr vh_copy;
+
+ pkt_type = RTE_PTYPE_L2_ETHER_VLAN;
+ vh = rte_pktmbuf_read(m, off, sizeof(*vh), &vh_copy);
+ if (unlikely(vh == NULL))
+ return pkt_type;
+ off += sizeof(*vh);
+ hdr_lens->l2_len += sizeof(*vh);
+ proto = vh->eth_proto;
+ } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_QINQ)) {
+ const struct vlan_hdr *vh;
+ struct vlan_hdr vh_copy;
+
+ pkt_type = RTE_PTYPE_L2_ETHER_QINQ;
+ vh = rte_pktmbuf_read(m, off + sizeof(*vh), sizeof(*vh),
+ &vh_copy);
+ if (unlikely(vh == NULL))
+ return pkt_type;
+ off += 2 * sizeof(*vh);
+ hdr_lens->l2_len += 2 * sizeof(*vh);
+ proto = vh->eth_proto;
+ }
+
+ l3:
+ if ((layers & RTE_PTYPE_L3_MASK) == 0)
+ return pkt_type;
+
+ if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
+ const struct ipv4_hdr *ip4h;
+ struct ipv4_hdr ip4h_copy;
+
+ ip4h = rte_pktmbuf_read(m, off, sizeof(*ip4h), &ip4h_copy);
+ if (unlikely(ip4h == NULL))
+ return pkt_type;
+
+ pkt_type |= ptype_l3_ip(ip4h->version_ihl);
+ hdr_lens->l3_len = ip4_hlen(ip4h);
+ off += hdr_lens->l3_len;
+
+ if ((layers & RTE_PTYPE_L4_MASK) == 0)
+ return pkt_type;
+
+ if (ip4h->fragment_offset & rte_cpu_to_be_16(
+ IPV4_HDR_OFFSET_MASK | IPV4_HDR_MF_FLAG)) {
+ pkt_type |= RTE_PTYPE_L4_FRAG;
+ hdr_lens->l4_len = 0;
+ return pkt_type;
+ }
+ proto = ip4h->next_proto_id;
+ pkt_type |= ptype_l4(proto);
+ } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) {
+ const struct ipv6_hdr *ip6h;
+ struct ipv6_hdr ip6h_copy;
+ int frag = 0;
+
+ ip6h = rte_pktmbuf_read(m, off, sizeof(*ip6h), &ip6h_copy);
+ if (unlikely(ip6h == NULL))
+ return pkt_type;
+
+ proto = ip6h->proto;
+ hdr_lens->l3_len = sizeof(*ip6h);
+ off += hdr_lens->l3_len;
+ pkt_type |= ptype_l3_ip6(proto);
+ if ((pkt_type & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6_EXT) {
+ proto = skip_ip6_ext(proto, m, &off, &frag);
+ hdr_lens->l3_len = off - hdr_lens->l2_len;
+ }
+ if (proto == 0)
+ return pkt_type;
+
+ if ((layers & RTE_PTYPE_L4_MASK) == 0)
+ return pkt_type;
+
+ if (frag) {
+ pkt_type |= RTE_PTYPE_L4_FRAG;
+ hdr_lens->l4_len = 0;
+ return pkt_type;
+ }
+ pkt_type |= ptype_l4(proto);
+ }
+
+ if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP) {
+ hdr_lens->l4_len = sizeof(struct udp_hdr);
+ return pkt_type;
+ } else if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP) {
+ const struct tcp_hdr *th;
+ struct tcp_hdr th_copy;
+
+ th = rte_pktmbuf_read(m, off, sizeof(*th), &th_copy);
+ if (unlikely(th == NULL))
+ return pkt_type & (RTE_PTYPE_L2_MASK |
+ RTE_PTYPE_L3_MASK);
+ hdr_lens->l4_len = (th->data_off & 0xf0) >> 2;
+ return pkt_type;
+ } else if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) {
+ hdr_lens->l4_len = sizeof(struct sctp_hdr);
+ return pkt_type;
+ } else {
+ uint32_t prev_off = off;
+
+ hdr_lens->l4_len = 0;
+
+ if ((layers & RTE_PTYPE_TUNNEL_MASK) == 0)
+ return pkt_type;
+
+ pkt_type |= ptype_tunnel(&proto, m, &off);
+ hdr_lens->tunnel_len = off - prev_off;
+ }
+
+ /* same job for inner header: we need to duplicate the code
+ * because the packet types do not have the same value.
+ */
+ if ((layers & RTE_PTYPE_INNER_L2_MASK) == 0)
+ return pkt_type;
+
+ if (proto == rte_cpu_to_be_16(ETHER_TYPE_TEB)) {
+ eh = rte_pktmbuf_read(m, off, sizeof(*eh), &eh_copy);
+ if (unlikely(eh == NULL))
+ return pkt_type;
+ pkt_type |= RTE_PTYPE_INNER_L2_ETHER;
+ proto = eh->ether_type;
+ off += sizeof(*eh);
+ hdr_lens->inner_l2_len = sizeof(*eh);
+ }
+
+ if (proto == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) {
+ const struct vlan_hdr *vh;
+ struct vlan_hdr vh_copy;
+
+ pkt_type &= ~RTE_PTYPE_INNER_L2_MASK;
+ pkt_type |= RTE_PTYPE_INNER_L2_ETHER_VLAN;
+ vh = rte_pktmbuf_read(m, off, sizeof(*vh), &vh_copy);
+ if (unlikely(vh == NULL))
+ return pkt_type;
+ off += sizeof(*vh);
+ hdr_lens->inner_l2_len += sizeof(*vh);
+ proto = vh->eth_proto;
+ } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_QINQ)) {
+ const struct vlan_hdr *vh;
+ struct vlan_hdr vh_copy;
+
+ pkt_type &= ~RTE_PTYPE_INNER_L2_MASK;
+ pkt_type |= RTE_PTYPE_INNER_L2_ETHER_QINQ;
+ vh = rte_pktmbuf_read(m, off + sizeof(*vh), sizeof(*vh),
+ &vh_copy);
+ if (unlikely(vh == NULL))
+ return pkt_type;
+ off += 2 * sizeof(*vh);
+ hdr_lens->inner_l2_len += 2 * sizeof(*vh);
+ proto = vh->eth_proto;
+ }
+
+ if ((layers & RTE_PTYPE_INNER_L3_MASK) == 0)
+ return pkt_type;
+
+ if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
+ const struct ipv4_hdr *ip4h;
+ struct ipv4_hdr ip4h_copy;
+
+ ip4h = rte_pktmbuf_read(m, off, sizeof(*ip4h), &ip4h_copy);
+ if (unlikely(ip4h == NULL))
+ return pkt_type;
+
+ pkt_type |= ptype_inner_l3_ip(ip4h->version_ihl);
+ hdr_lens->inner_l3_len = ip4_hlen(ip4h);
+ off += hdr_lens->inner_l3_len;
+
+ if ((layers & RTE_PTYPE_INNER_L4_MASK) == 0)
+ return pkt_type;
+ if (ip4h->fragment_offset &
+ rte_cpu_to_be_16(IPV4_HDR_OFFSET_MASK |
+ IPV4_HDR_MF_FLAG)) {
+ pkt_type |= RTE_PTYPE_INNER_L4_FRAG;
+ hdr_lens->inner_l4_len = 0;
+ return pkt_type;
+ }
+ proto = ip4h->next_proto_id;
+ pkt_type |= ptype_inner_l4(proto);
+ } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) {
+ const struct ipv6_hdr *ip6h;
+ struct ipv6_hdr ip6h_copy;
+ int frag = 0;
+
+ ip6h = rte_pktmbuf_read(m, off, sizeof(*ip6h), &ip6h_copy);
+ if (unlikely(ip6h == NULL))
+ return pkt_type;
+
+ proto = ip6h->proto;
+ hdr_lens->inner_l3_len = sizeof(*ip6h);
+ off += hdr_lens->inner_l3_len;
+ pkt_type |= ptype_inner_l3_ip6(proto);
+ if ((pkt_type & RTE_PTYPE_INNER_L3_MASK) ==
+ RTE_PTYPE_INNER_L3_IPV6_EXT) {
+ uint32_t prev_off;
+
+ prev_off = off;
+ proto = skip_ip6_ext(proto, m, &off, &frag);
+ hdr_lens->inner_l3_len += off - prev_off;
+ }
+ if (proto == 0)
+ return pkt_type;
+
+ if ((layers & RTE_PTYPE_INNER_L4_MASK) == 0)
+ return pkt_type;
+
+ if (frag) {
+ pkt_type |= RTE_PTYPE_INNER_L4_FRAG;
+ hdr_lens->inner_l4_len = 0;
+ return pkt_type;
+ }
+ pkt_type |= ptype_inner_l4(proto);
+ }
+
+ if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) == RTE_PTYPE_INNER_L4_UDP) {
+ hdr_lens->inner_l4_len = sizeof(struct udp_hdr);
+ } else if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) ==
+ RTE_PTYPE_INNER_L4_TCP) {
+ const struct tcp_hdr *th;
+ struct tcp_hdr th_copy;
+
+ th = rte_pktmbuf_read(m, off, sizeof(*th), &th_copy);
+ if (unlikely(th == NULL))
+ return pkt_type & (RTE_PTYPE_INNER_L2_MASK |
+ RTE_PTYPE_INNER_L3_MASK);
+ hdr_lens->inner_l4_len = (th->data_off & 0xf0) >> 2;
+ } else if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) ==
+ RTE_PTYPE_INNER_L4_SCTP) {
+ hdr_lens->inner_l4_len = sizeof(struct sctp_hdr);
+ } else {
+ hdr_lens->inner_l4_len = 0;
+ }
+
+ return pkt_type;
+}
diff --git a/lib/librte_vhost/vhost_cuse/eventfd_copy.h b/lib/librte_net/rte_net.h
index 5f446ca0..d4156aea 100644
--- a/lib/librte_vhost/vhost_cuse/eventfd_copy.h
+++ b/lib/librte_net/rte_net.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright 2016 6WIND S.A.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,16 +30,65 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef _EVENTFD_H
-#define _EVENTFD_H
-int
-eventfd_init(void);
+#ifndef _RTE_NET_PTYPE_H_
+#define _RTE_NET_PTYPE_H_
-int
-eventfd_free(void);
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Structure containing header lengths associated to a packet, filled
+ * by rte_net_get_ptype().
+ */
+struct rte_net_hdr_lens {
+ uint8_t l2_len;
+ uint8_t l3_len;
+ uint8_t l4_len;
+ uint8_t tunnel_len;
+ uint8_t inner_l2_len;
+ uint8_t inner_l3_len;
+ uint8_t inner_l4_len;
+};
-int
-eventfd_copy(int target_fd, int target_pid);
+/**
+ * Parse an Ethernet packet to get its packet type.
+ *
+ * This function parses the network headers in mbuf data and return its
+ * packet type.
+ *
+ * If it is provided by the user, it also fills a rte_net_hdr_lens
+ * structure that contains the lengths of the parsed network
+ * headers. Each length field is valid only if the associated packet
+ * type is set. For instance, hdr_lens->l2_len is valid only if
+ * (retval & RTE_PTYPE_L2_MASK) != RTE_PTYPE_UNKNOWN.
+ *
+ * Supported packet types are:
+ * L2: Ether, Vlan, QinQ
+ * L3: IPv4, IPv6
+ * L4: TCP, UDP, SCTP
+ * Tunnels: IPv4, IPv6, Gre, Nvgre
+ *
+ * @param m
+ * The packet mbuf to be parsed.
+ * @param hdr_lens
+ * A pointer to a structure where the header lengths will be returned,
+ * or NULL.
+ * @param layers
+ * List of layers to parse. The function will stop at the first
+ * empty layer. Examples:
+ * - To parse all known layers, use RTE_PTYPE_ALL_MASK.
+ * - To parse only L2 and L3, use RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK
+ * @return
+ * The packet type of the packet.
+ */
+uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
+ struct rte_net_hdr_lens *hdr_lens, uint32_t layers);
+#ifdef __cplusplus
+}
#endif
+
+
+#endif /* _RTE_NET_PTYPE_H_ */
diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map
new file mode 100644
index 00000000..3b15e651
--- /dev/null
+++ b/lib/librte_net/rte_net_version.map
@@ -0,0 +1,6 @@
+DPDK_16.11 {
+ global:
+ rte_net_get_ptype;
+
+ local: *;
+};
diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
index ea5ccd98..59686837 100644
--- a/lib/librte_pdump/rte_pdump.c
+++ b/lib/librte_pdump/rte_pdump.c
@@ -226,29 +226,6 @@ pdump_tx(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
}
static int
-pdump_get_dombdf(char *device_id, char *domBDF, size_t len)
-{
- int ret;
- struct rte_pci_addr dev_addr = {0};
-
- /* identify if device_id is pci address or name */
- ret = eal_parse_pci_DomBDF(device_id, &dev_addr);
- if (ret < 0)
- return -1;
-
- if (dev_addr.domain)
- ret = snprintf(domBDF, len, "%u:%u:%u.%u", dev_addr.domain,
- dev_addr.bus, dev_addr.devid,
- dev_addr.function);
- else
- ret = snprintf(domBDF, len, "%u:%u.%u", dev_addr.bus,
- dev_addr.devid,
- dev_addr.function);
-
- return ret;
-}
-
-static int
pdump_regitser_rx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue,
struct rte_ring *ring, struct rte_mempool *mp,
uint16_t operation)
@@ -292,7 +269,7 @@ pdump_regitser_rx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue,
if (ret < 0) {
RTE_LOG(ERR, PDUMP,
"failed to remove rx callback, errno=%d\n",
- rte_errno);
+ -ret);
return ret;
}
cbs->cb = NULL;
@@ -347,7 +324,7 @@ pdump_regitser_tx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue,
if (ret < 0) {
RTE_LOG(ERR, PDUMP,
"failed to remove tx callback, errno=%d\n",
- rte_errno);
+ -ret);
return ret;
}
cbs->cb = NULL;
@@ -885,7 +862,6 @@ rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue,
void *filter)
{
int ret = 0;
- char domBDF[DEVICE_ID_SIZE];
ret = pdump_validate_ring_mp(ring, mp);
if (ret < 0)
@@ -894,11 +870,7 @@ rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue,
if (ret < 0)
return ret;
- if (pdump_get_dombdf(device_id, domBDF, sizeof(domBDF)) > 0)
- ret = pdump_prepare_client_request(domBDF, queue, flags,
- ENABLE, ring, mp, filter);
- else
- ret = pdump_prepare_client_request(device_id, queue, flags,
+ ret = pdump_prepare_client_request(device_id, queue, flags,
ENABLE, ring, mp, filter);
return ret;
@@ -928,17 +900,12 @@ rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue,
uint32_t flags)
{
int ret = 0;
- char domBDF[DEVICE_ID_SIZE];
ret = pdump_validate_flags(flags);
if (ret < 0)
return ret;
- if (pdump_get_dombdf(device_id, domBDF, sizeof(domBDF)) > 0)
- ret = pdump_prepare_client_request(domBDF, queue, flags,
- DISABLE, NULL, NULL, NULL);
- else
- ret = pdump_prepare_client_request(device_id, queue, flags,
+ ret = pdump_prepare_client_request(device_id, queue, flags,
DISABLE, NULL, NULL, NULL);
return ret;
diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h
index b5f4e2f3..924b8043 100644
--- a/lib/librte_pdump/rte_pdump.h
+++ b/lib/librte_pdump/rte_pdump.h
@@ -41,6 +41,10 @@
* packet dump library to provide packet capturing support on dpdk.
*/
+#include <stdint.h>
+#include <rte_mempool.h>
+#include <rte_ring.h>
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/lib/librte_pipeline/rte_pipeline.h b/lib/librte_pipeline/rte_pipeline.h
index 84d18025..f3663483 100644
--- a/lib/librte_pipeline/rte_pipeline.h
+++ b/lib/librte_pipeline/rte_pipeline.h
@@ -87,6 +87,7 @@ extern "C" {
#include <rte_port.h>
#include <rte_table.h>
+#include <rte_common.h>
struct rte_mbuf;
@@ -244,6 +245,7 @@ struct rte_pipeline_table_entry {
/** Reserved action */
enum rte_pipeline_action action;
+ RTE_STD_C11
union {
/** Output port ID (meta-data for "Send packet to output port"
action) */
@@ -252,7 +254,7 @@ struct rte_pipeline_table_entry {
uint32_t table_id;
};
/** Start of table entry area for user defined actions and meta-data */
- uint8_t action_data[0];
+ __extension__ uint8_t action_data[0];
};
/**
diff --git a/lib/librte_port/Makefile b/lib/librte_port/Makefile
index 3d84a0e4..44fa7352 100644
--- a/lib/librte_port/Makefile
+++ b/lib/librte_port/Makefile
@@ -56,6 +56,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_frag.c
SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_ras.c
endif
SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_sched.c
+SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_fd.c
ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_kni.c
endif
@@ -70,6 +71,7 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_frag.h
SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_ras.h
endif
SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_sched.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_fd.h
ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_kni.h
endif
diff --git a/lib/librte_port/rte_port_fd.c b/lib/librte_port/rte_port_fd.c
new file mode 100644
index 00000000..0d640f34
--- /dev/null
+++ b/lib/librte_port/rte_port_fd.c
@@ -0,0 +1,552 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+
+#include "rte_port_fd.h"
+
+/*
+ * Port FD Reader
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(port, val) \
+ do { port->stats.n_pkts_in += val; } while (0)
+#define RTE_PORT_FD_READER_STATS_PKTS_DROP_ADD(port, val) \
+ do { port->stats.n_pkts_drop += val; } while (0)
+
+#else
+
+#define RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_FD_READER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_fd_reader {
+ struct rte_port_in_stats stats;
+ int fd;
+ uint32_t mtu;
+ struct rte_mempool *mempool;
+};
+
+static void *
+rte_port_fd_reader_create(void *params, int socket_id)
+{
+ struct rte_port_fd_reader_params *conf =
+ (struct rte_port_fd_reader_params *) params;
+ struct rte_port_fd_reader *port;
+
+ /* Check input parameters */
+ if (conf == NULL) {
+ RTE_LOG(ERR, PORT, "%s: params is NULL\n", __func__);
+ return NULL;
+ }
+ if (conf->fd < 0) {
+ RTE_LOG(ERR, PORT, "%s: Invalid file descriptor\n", __func__);
+ return NULL;
+ }
+ if (conf->mtu == 0) {
+ RTE_LOG(ERR, PORT, "%s: Invalid MTU\n", __func__);
+ return NULL;
+ }
+ if (conf->mempool == NULL) {
+ RTE_LOG(ERR, PORT, "%s: Invalid mempool\n", __func__);
+ return NULL;
+ }
+
+ /* Memory allocation */
+ port = rte_zmalloc_socket("PORT", sizeof(*port),
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (port == NULL) {
+ RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+ return NULL;
+ }
+
+ /* Initialization */
+ port->fd = conf->fd;
+ port->mtu = conf->mtu;
+ port->mempool = conf->mempool;
+
+ return port;
+}
+
+static int
+rte_port_fd_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+ struct rte_port_fd_reader *p = (struct rte_port_fd_reader *) port;
+ uint32_t i;
+
+ if (rte_mempool_get_bulk(p->mempool, (void **) pkts, n_pkts) != 0)
+ return 0;
+
+ for (i = 0; i < n_pkts; i++) {
+ rte_mbuf_refcnt_set(pkts[i], 1);
+ rte_pktmbuf_reset(pkts[i]);
+ }
+
+ for (i = 0; i < n_pkts; i++) {
+ struct rte_mbuf *pkt = pkts[i];
+ void *pkt_data = rte_pktmbuf_mtod(pkt, void *);
+ ssize_t n_bytes;
+
+ n_bytes = read(p->fd, pkt_data, (size_t) p->mtu);
+ if (n_bytes <= 0)
+ break;
+
+ pkt->data_len = n_bytes;
+ pkt->pkt_len = n_bytes;
+ }
+
+ for ( ; i < n_pkts; i++)
+ rte_pktmbuf_free(pkts[i]);
+
+ RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(p, i);
+
+ return n_pkts;
+}
+
+static int
+rte_port_fd_reader_free(void *port)
+{
+ if (port == NULL) {
+ RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__);
+ return -EINVAL;
+ }
+
+ rte_free(port);
+
+ return 0;
+}
+
+static int rte_port_fd_reader_stats_read(void *port,
+ struct rte_port_in_stats *stats, int clear)
+{
+ struct rte_port_fd_reader *p =
+ (struct rte_port_fd_reader *) port;
+
+ if (stats != NULL)
+ memcpy(stats, &p->stats, sizeof(p->stats));
+
+ if (clear)
+ memset(&p->stats, 0, sizeof(p->stats));
+
+ return 0;
+}
+
+/*
+ * Port FD Writer
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(port, val) \
+ do { port->stats.n_pkts_in += val; } while (0)
+#define RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(port, val) \
+ do { port->stats.n_pkts_drop += val; } while (0)
+
+#else
+
+#define RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_fd_writer {
+ struct rte_port_out_stats stats;
+
+ struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+ uint32_t tx_burst_sz;
+ uint16_t tx_buf_count;
+ uint32_t fd;
+};
+
+static void *
+rte_port_fd_writer_create(void *params, int socket_id)
+{
+ struct rte_port_fd_writer_params *conf =
+ (struct rte_port_fd_writer_params *) params;
+ struct rte_port_fd_writer *port;
+
+ /* Check input parameters */
+ if ((conf == NULL) ||
+ (conf->tx_burst_sz == 0) ||
+ (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) ||
+ (!rte_is_power_of_2(conf->tx_burst_sz))) {
+ RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__);
+ return NULL;
+ }
+
+ /* Memory allocation */
+ port = rte_zmalloc_socket("PORT", sizeof(*port),
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (port == NULL) {
+ RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+ return NULL;
+ }
+
+ /* Initialization */
+ port->fd = conf->fd;
+ port->tx_burst_sz = conf->tx_burst_sz;
+ port->tx_buf_count = 0;
+
+ return port;
+}
+
+static inline void
+send_burst(struct rte_port_fd_writer *p)
+{
+ uint32_t i;
+
+ for (i = 0; i < p->tx_buf_count; i++) {
+ struct rte_mbuf *pkt = p->tx_buf[i];
+ void *pkt_data = rte_pktmbuf_mtod(pkt, void*);
+ size_t n_bytes = rte_pktmbuf_data_len(pkt);
+ ssize_t ret;
+
+ ret = write(p->fd, pkt_data, n_bytes);
+ if (ret < 0)
+ break;
+ }
+
+ RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - i);
+
+ for (i = 0; i < p->tx_buf_count; i++)
+ rte_pktmbuf_free(p->tx_buf[i]);
+
+ p->tx_buf_count = 0;
+}
+
+static int
+rte_port_fd_writer_tx(void *port, struct rte_mbuf *pkt)
+{
+ struct rte_port_fd_writer *p =
+ (struct rte_port_fd_writer *) port;
+
+ p->tx_buf[p->tx_buf_count++] = pkt;
+ RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, 1);
+ if (p->tx_buf_count >= p->tx_burst_sz)
+ send_burst(p);
+
+ return 0;
+}
+
+static int
+rte_port_fd_writer_tx_bulk(void *port,
+ struct rte_mbuf **pkts,
+ uint64_t pkts_mask)
+{
+ struct rte_port_fd_writer *p =
+ (struct rte_port_fd_writer *) port;
+ uint32_t tx_buf_count = p->tx_buf_count;
+
+ if ((pkts_mask & (pkts_mask + 1)) == 0) {
+ uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+ uint32_t i;
+
+ for (i = 0; i < n_pkts; i++)
+ p->tx_buf[tx_buf_count++] = pkts[i];
+ RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, n_pkts);
+ } else
+ for ( ; pkts_mask; ) {
+ uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+ uint64_t pkt_mask = 1LLU << pkt_index;
+ struct rte_mbuf *pkt = pkts[pkt_index];
+
+ p->tx_buf[tx_buf_count++] = pkt;
+ RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, 1);
+ pkts_mask &= ~pkt_mask;
+ }
+
+ p->tx_buf_count = tx_buf_count;
+ if (tx_buf_count >= p->tx_burst_sz)
+ send_burst(p);
+
+ return 0;
+}
+
+static int
+rte_port_fd_writer_flush(void *port)
+{
+ struct rte_port_fd_writer *p =
+ (struct rte_port_fd_writer *) port;
+
+ if (p->tx_buf_count > 0)
+ send_burst(p);
+
+ return 0;
+}
+
+static int
+rte_port_fd_writer_free(void *port)
+{
+ if (port == NULL) {
+ RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__);
+ return -EINVAL;
+ }
+
+ rte_port_fd_writer_flush(port);
+ rte_free(port);
+
+ return 0;
+}
+
+static int rte_port_fd_writer_stats_read(void *port,
+ struct rte_port_out_stats *stats, int clear)
+{
+ struct rte_port_fd_writer *p =
+ (struct rte_port_fd_writer *) port;
+
+ if (stats != NULL)
+ memcpy(stats, &p->stats, sizeof(p->stats));
+
+ if (clear)
+ memset(&p->stats, 0, sizeof(p->stats));
+
+ return 0;
+}
+
+/*
+ * Port FD Writer Nodrop
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \
+ do { port->stats.n_pkts_in += val; } while (0)
+#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \
+ do { port->stats.n_pkts_drop += val; } while (0)
+
+#else
+
+#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_fd_writer_nodrop {
+ struct rte_port_out_stats stats;
+
+ struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+ uint32_t tx_burst_sz;
+ uint16_t tx_buf_count;
+ uint64_t n_retries;
+ uint32_t fd;
+};
+
+static void *
+rte_port_fd_writer_nodrop_create(void *params, int socket_id)
+{
+ struct rte_port_fd_writer_nodrop_params *conf =
+ (struct rte_port_fd_writer_nodrop_params *) params;
+ struct rte_port_fd_writer_nodrop *port;
+
+ /* Check input parameters */
+ if ((conf == NULL) ||
+ (conf->fd < 0) ||
+ (conf->tx_burst_sz == 0) ||
+ (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) ||
+ (!rte_is_power_of_2(conf->tx_burst_sz))) {
+ RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__);
+ return NULL;
+ }
+
+ /* Memory allocation */
+ port = rte_zmalloc_socket("PORT", sizeof(*port),
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (port == NULL) {
+ RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+ return NULL;
+ }
+
+ /* Initialization */
+ port->fd = conf->fd;
+ port->tx_burst_sz = conf->tx_burst_sz;
+ port->tx_buf_count = 0;
+
+ /*
+ * When n_retries is 0 it means that we should wait for every packet to
+ * send no matter how many retries should it take. To limit number of
+ * branches in fast path, we use UINT64_MAX instead of branching.
+ */
+ port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries;
+
+ return port;
+}
+
+static inline void
+send_burst_nodrop(struct rte_port_fd_writer_nodrop *p)
+{
+ uint64_t n_retries;
+ uint32_t i;
+
+ n_retries = 0;
+ for (i = 0; (i < p->tx_buf_count) && (n_retries < p->n_retries); i++) {
+ struct rte_mbuf *pkt = p->tx_buf[i];
+ void *pkt_data = rte_pktmbuf_mtod(pkt, void*);
+ size_t n_bytes = rte_pktmbuf_data_len(pkt);
+
+ for ( ; n_retries < p->n_retries; n_retries++) {
+ ssize_t ret;
+
+ ret = write(p->fd, pkt_data, n_bytes);
+ if (ret)
+ break;
+ }
+ }
+
+ RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - i);
+
+ for (i = 0; i < p->tx_buf_count; i++)
+ rte_pktmbuf_free(p->tx_buf[i]);
+
+ p->tx_buf_count = 0;
+}
+
+static int
+rte_port_fd_writer_nodrop_tx(void *port, struct rte_mbuf *pkt)
+{
+ struct rte_port_fd_writer_nodrop *p =
+ (struct rte_port_fd_writer_nodrop *) port;
+
+ p->tx_buf[p->tx_buf_count++] = pkt;
+ RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1);
+ if (p->tx_buf_count >= p->tx_burst_sz)
+ send_burst_nodrop(p);
+
+ return 0;
+}
+
+static int
+rte_port_fd_writer_nodrop_tx_bulk(void *port,
+ struct rte_mbuf **pkts,
+ uint64_t pkts_mask)
+{
+ struct rte_port_fd_writer_nodrop *p =
+ (struct rte_port_fd_writer_nodrop *) port;
+ uint32_t tx_buf_count = p->tx_buf_count;
+
+ if ((pkts_mask & (pkts_mask + 1)) == 0) {
+ uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+ uint32_t i;
+
+ for (i = 0; i < n_pkts; i++)
+ p->tx_buf[tx_buf_count++] = pkts[i];
+ RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts);
+ } else
+ for ( ; pkts_mask; ) {
+ uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+ uint64_t pkt_mask = 1LLU << pkt_index;
+ struct rte_mbuf *pkt = pkts[pkt_index];
+
+ p->tx_buf[tx_buf_count++] = pkt;
+ RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1);
+ pkts_mask &= ~pkt_mask;
+ }
+
+ p->tx_buf_count = tx_buf_count;
+ if (tx_buf_count >= p->tx_burst_sz)
+ send_burst_nodrop(p);
+
+ return 0;
+}
+
+static int
+rte_port_fd_writer_nodrop_flush(void *port)
+{
+ struct rte_port_fd_writer_nodrop *p =
+ (struct rte_port_fd_writer_nodrop *) port;
+
+ if (p->tx_buf_count > 0)
+ send_burst_nodrop(p);
+
+ return 0;
+}
+
+static int
+rte_port_fd_writer_nodrop_free(void *port)
+{
+ if (port == NULL) {
+ RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__);
+ return -EINVAL;
+ }
+
+ rte_port_fd_writer_nodrop_flush(port);
+ rte_free(port);
+
+return 0;
+}
+
+static int rte_port_fd_writer_nodrop_stats_read(void *port,
+ struct rte_port_out_stats *stats, int clear)
+{
+ struct rte_port_fd_writer_nodrop *p =
+ (struct rte_port_fd_writer_nodrop *) port;
+
+ if (stats != NULL)
+ memcpy(stats, &p->stats, sizeof(p->stats));
+
+ if (clear)
+ memset(&p->stats, 0, sizeof(p->stats));
+
+ return 0;
+}
+
+/*
+ * Summary of port operations
+ */
+struct rte_port_in_ops rte_port_fd_reader_ops = {
+ .f_create = rte_port_fd_reader_create,
+ .f_free = rte_port_fd_reader_free,
+ .f_rx = rte_port_fd_reader_rx,
+ .f_stats = rte_port_fd_reader_stats_read,
+};
+
+struct rte_port_out_ops rte_port_fd_writer_ops = {
+ .f_create = rte_port_fd_writer_create,
+ .f_free = rte_port_fd_writer_free,
+ .f_tx = rte_port_fd_writer_tx,
+ .f_tx_bulk = rte_port_fd_writer_tx_bulk,
+ .f_flush = rte_port_fd_writer_flush,
+ .f_stats = rte_port_fd_writer_stats_read,
+};
+
+struct rte_port_out_ops rte_port_fd_writer_nodrop_ops = {
+ .f_create = rte_port_fd_writer_nodrop_create,
+ .f_free = rte_port_fd_writer_nodrop_free,
+ .f_tx = rte_port_fd_writer_nodrop_tx,
+ .f_tx_bulk = rte_port_fd_writer_nodrop_tx_bulk,
+ .f_flush = rte_port_fd_writer_nodrop_flush,
+ .f_stats = rte_port_fd_writer_nodrop_stats_read,
+};
diff --git a/lib/librte_vhost/vhost_cuse/eventfd_copy.c b/lib/librte_port/rte_port_fd.h
index 154b32a4..77a2d31b 100644
--- a/lib/librte_vhost/vhost_cuse/eventfd_copy.c
+++ b/lib/librte_port/rte_port_fd.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,74 +31,75 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <unistd.h>
-#include <sys/eventfd.h>
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
+#ifndef __INCLUDE_RTE_PORT_FD_H__
+#define __INCLUDE_RTE_PORT_FD_H__
-#include <rte_log.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
-#include "eventfd_link/eventfd_link.h"
-#include "eventfd_copy.h"
-#include "vhost-net.h"
+/**
+ * @file
+ * RTE Port FD Device
+ *
+ * fd_reader: input port built on top of valid non-blocking file descriptor
+ * fd_writer: output port built on top of valid non-blocking file descriptor
+ *
+ ***/
-static const char eventfd_cdev[] = "/dev/eventfd-link";
+#include <stdint.h>
-static int eventfd_link = -1;
+#include <rte_mempool.h>
+#include "rte_port.h"
-int
-eventfd_init(void)
-{
- if (eventfd_link >= 0)
- return 0;
+/** fd_reader port parameters */
+struct rte_port_fd_reader_params {
+ /** File descriptor */
+ int fd;
- eventfd_link = open(eventfd_cdev, O_RDWR);
- if (eventfd_link < 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "eventfd_link module is not loaded\n");
- return -1;
- }
+ /** Maximum Transfer Unit (MTU) */
+ uint32_t mtu;
- return 0;
-}
+ /** Pre-initialized buffer pool */
+ struct rte_mempool *mempool;
+};
-int
-eventfd_free(void)
-{
- if (eventfd_link >= 0)
- close(eventfd_link);
- return 0;
-}
+/** fd_reader port operations */
+extern struct rte_port_in_ops rte_port_fd_reader_ops;
-/*
- * This function uses the eventfd_link kernel module to copy an eventfd file
- * descriptor provided by QEMU in to our process space.
- */
-int
-eventfd_copy(int target_fd, int target_pid)
-{
- int ret;
- struct eventfd_copy2 eventfd_copy2;
-
-
- /* Open the character device to the kernel module. */
- /* TODO: check this earlier rather than fail until VM boots! */
- if (eventfd_init() < 0)
- return -1;
-
- eventfd_copy2.fd = target_fd;
- eventfd_copy2.pid = target_pid;
- eventfd_copy2.flags = O_NONBLOCK | O_CLOEXEC;
- /* Call the IOCTL to copy the eventfd. */
- ret = ioctl(eventfd_link, EVENTFD_COPY2, &eventfd_copy2);
-
- if (ret < 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "EVENTFD_COPY2 ioctl failed\n");
- return -1;
- }
-
- return ret;
+/** fd_writer port parameters */
+struct rte_port_fd_writer_params {
+ /** File descriptor */
+ int fd;
+
+ /**< Recommended write burst size. The actual burst size can be
+ * bigger or smaller than this value.
+ */
+ uint32_t tx_burst_sz;
+};
+
+/** fd_writer port operations */
+extern struct rte_port_out_ops rte_port_fd_writer_ops;
+
+/** fd_writer_nodrop port parameters */
+struct rte_port_fd_writer_nodrop_params {
+ /** File descriptor */
+ int fd;
+
+ /**< Recommended write burst size. The actual burst size can be
+ * bigger or smaller than this value.
+ */
+ uint32_t tx_burst_sz;
+
+ /** Maximum number of retries, 0 for no limit */
+ uint32_t n_retries;
+};
+
+/** fd_writer_nodrop port operations */
+extern struct rte_port_out_ops rte_port_fd_writer_nodrop_ops;
+
+#ifdef __cplusplus
}
+#endif
+
+#endif
diff --git a/lib/librte_port/rte_port_source_sink.h b/lib/librte_port/rte_port_source_sink.h
index 4db8a8a8..be585a77 100644
--- a/lib/librte_port/rte_port_source_sink.h
+++ b/lib/librte_port/rte_port_source_sink.h
@@ -55,7 +55,7 @@ struct rte_port_source_params {
struct rte_mempool *mempool;
/** The full path of the pcap file to read packets from */
- char *file_name;
+ const char *file_name;
/** The number of bytes to be read from each packet in the
* pcap file. If this value is 0, the whole packet is read;
* if it is bigger than packet size, the generated packets
@@ -69,7 +69,7 @@ extern struct rte_port_in_ops rte_port_source_ops;
/** sink port parameters */
struct rte_port_sink_params {
/** The full path of the pcap file to write the packets to */
- char *file_name;
+ const char *file_name;
/** The maximum number of packets write to the pcap file.
* If this value is 0, the "infinite" write will be carried
* out.
diff --git a/lib/librte_port/rte_port_version.map b/lib/librte_port/rte_port_version.map
index 048c20d7..6470629b 100644
--- a/lib/librte_port/rte_port_version.map
+++ b/lib/librte_port/rte_port_version.map
@@ -42,3 +42,12 @@ DPDK_16.07 {
rte_port_kni_writer_nodrop_ops;
} DPDK_2.2;
+
+DPDK_16.11 {
+ global:
+
+ rte_port_fd_reader_ops;
+ rte_port_fd_writer_ops;
+ rte_port_fd_writer_nodrop_ops;
+
+} DPDK_16.07;
diff --git a/lib/librte_reorder/rte_reorder.h b/lib/librte_reorder/rte_reorder.h
index c7a2934c..737e0554 100644
--- a/lib/librte_reorder/rte_reorder.h
+++ b/lib/librte_reorder/rte_reorder.h
@@ -44,6 +44,8 @@
*
*/
+#include <rte_mbuf.h>
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h
index 0e22e694..32b8c8d2 100644
--- a/lib/librte_ring/rte_ring.h
+++ b/lib/librte_ring/rte_ring.h
@@ -187,7 +187,7 @@ struct rte_ring {
struct rte_ring_debug_stats stats[RTE_MAX_LCORE];
#endif
- void * ring[0] __rte_cache_aligned; /**< Memory space of ring starts here.
+ void *ring[] __rte_cache_aligned; /**< Memory space of ring starts here.
* not volatile so need to be careful
* about compiler re-ordering */
};
@@ -341,7 +341,7 @@ void rte_ring_free(struct rte_ring *r);
int rte_ring_set_water_mark(struct rte_ring *r, unsigned count);
/**
- * Dump the status of the ring to the console.
+ * Dump the status of the ring to a file.
*
* @param f
* A pointer to a file for output
diff --git a/lib/librte_sched/rte_bitmap.h b/lib/librte_sched/rte_bitmap.h
index ff675c58..010d752c 100644
--- a/lib/librte_sched/rte_bitmap.h
+++ b/lib/librte_sched/rte_bitmap.h
@@ -64,6 +64,7 @@ extern "C" {
*
***/
+#include <string.h>
#include <rte_common.h>
#include <rte_debug.h>
#include <rte_memory.h>
@@ -103,7 +104,7 @@ struct rte_bitmap {
uint32_t go2; /**< Bitmap scan: Go/stop condition for current array2 cache line */
/* Storage space for array1 and array2 */
- uint8_t memory[0];
+ uint8_t memory[];
};
static inline void
diff --git a/lib/librte_sched/rte_reciprocal.h b/lib/librte_sched/rte_reciprocal.h
index abd15251..5e21f096 100644
--- a/lib/librte_sched/rte_reciprocal.h
+++ b/lib/librte_sched/rte_reciprocal.h
@@ -22,6 +22,8 @@
#ifndef _RTE_RECIPROCAL_H_
#define _RTE_RECIPROCAL_H_
+#include <stdint.h>
+
struct rte_reciprocal {
uint32_t m;
uint8_t sh1, sh2;
diff --git a/lib/librte_sched/rte_sched_common.h b/lib/librte_sched/rte_sched_common.h
index 8920adec..aed144ba 100644
--- a/lib/librte_sched/rte_sched_common.h
+++ b/lib/librte_sched/rte_sched_common.h
@@ -38,6 +38,7 @@
extern "C" {
#endif
+#include <stdint.h>
#include <sys/types.h>
#define __rte_aligned_16 __attribute__((__aligned__(16)))
diff --git a/lib/librte_table/Makefile b/lib/librte_table/Makefile
index 7a8a3f3c..c82c7696 100644
--- a/lib/librte_table/Makefile
+++ b/lib/librte_table/Makefile
@@ -1,6 +1,6 @@
# BSD LICENSE
#
-# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@@ -51,6 +51,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_lpm_ipv6.c
ifeq ($(CONFIG_RTE_LIBRTE_ACL),y)
SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_acl.c
endif
+SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_cuckoo.c
SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_key8.c
SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_key16.c
SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_key32.c
@@ -80,5 +81,6 @@ DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_lpm
ifeq ($(CONFIG_RTE_LIBRTE_ACL),y)
DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_acl
endif
+DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_hash
include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_table/rte_table_hash.h b/lib/librte_table/rte_table_hash.h
index 9d17516a..57505a6f 100644
--- a/lib/librte_table/rte_table_hash.h
+++ b/lib/librte_table/rte_table_hash.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -363,6 +363,35 @@ struct rte_table_hash_key32_ext_params {
/** Extendible bucket hash table operations */
extern struct rte_table_ops rte_table_hash_key32_ext_ops;
+/** Cuckoo hash table parameters */
+struct rte_table_hash_cuckoo_params {
+ /** Key size (number of bytes */
+ uint32_t key_size;
+
+ /** Maximum number of hash table entries */
+ uint32_t n_keys;
+
+ /** Hash function used to calculate hash */
+ rte_table_hash_op_hash f_hash;
+
+ /** Seed value or Init value used by f_hash */
+ uint32_t seed;
+
+ /** Byte offset within packet meta-data where the 4-byte key signature
+ is located. Valid for pre-computed key signature tables, ignored for
+ do-sig tables. */
+ uint32_t signature_offset;
+
+ /** Byte offset within packet meta-data where the key is located */
+ uint32_t key_offset;
+
+ /** Hash table name */
+ const char *name;
+};
+
+/** Cuckoo hash table operations */
+extern struct rte_table_ops rte_table_hash_cuckoo_dosig_ops;
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_table/rte_table_hash_cuckoo.c b/lib/librte_table/rte_table_hash_cuckoo.c
new file mode 100644
index 00000000..ff7baee3
--- /dev/null
+++ b/lib/librte_table/rte_table_hash_cuckoo.c
@@ -0,0 +1,382 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include <rte_hash.h>
+#include "rte_table_hash.h"
+
+#ifdef RTE_TABLE_STATS_COLLECT
+
+#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(table, val) \
+ (table->stats.n_pkts_in += val)
+#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(table, val) \
+ (table->stats.n_pkts_lookup_miss += val)
+
+#else
+
+#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(table, val)
+#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(table, val)
+
+#endif
+
+
+struct rte_table_hash {
+ struct rte_table_stats stats;
+
+ /* Input parameters */
+ uint32_t key_size;
+ uint32_t entry_size;
+ uint32_t n_keys;
+ rte_table_hash_op_hash f_hash;
+ uint32_t seed;
+ uint32_t signature_offset;
+ uint32_t key_offset;
+ const char *name;
+
+ /* cuckoo hash table object */
+ struct rte_hash *h_table;
+
+ /* Lookup table */
+ uint8_t memory[0] __rte_cache_aligned; };
+
+static int
+check_params_create_hash_cuckoo(const struct
+rte_table_hash_cuckoo_params *params) {
+ /* Check for valid parameters */
+ if (params == NULL) {
+ RTE_LOG(ERR, TABLE, "NULL Input Parameters.\n");
+ return -EINVAL;
+ }
+
+ if (params->key_size == 0) {
+ RTE_LOG(ERR, TABLE, "Invalid key_size.\n");
+ return -EINVAL;
+ }
+
+ if (params->n_keys == 0) {
+ RTE_LOG(ERR, TABLE, "Invalid n_keys.\n");
+ return -EINVAL;
+ }
+
+ if (params->f_hash == NULL) {
+ RTE_LOG(ERR, TABLE, "f_hash is NULL.\n");
+ return -EINVAL;
+ }
+
+ if (params->name == NULL) {
+ RTE_LOG(ERR, TABLE, "Table name is NULL.\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void *
+rte_table_hash_cuckoo_create(void *params,
+ int socket_id,
+ uint32_t entry_size)
+{
+ struct rte_hash *rte_hash_handle;
+ struct rte_table_hash *t;
+ uint32_t total_size, total_cl_size;
+
+ /* Check input parameters */
+ struct rte_table_hash_cuckoo_params *p =
+ (struct rte_table_hash_cuckoo_params *) params;
+
+ if (check_params_create_hash_cuckoo(params))
+ return NULL;
+
+ /* Memory allocation */
+ total_cl_size =
+ (sizeof(struct rte_table_hash) +
+ RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE;
+ total_cl_size += (p->n_keys * entry_size +
+ RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE;
+ total_size = total_cl_size * RTE_CACHE_LINE_SIZE;
+
+ t = rte_zmalloc_socket("TABLE",
+ total_size,
+ RTE_CACHE_LINE_SIZE,
+ socket_id);
+ if (t == NULL) {
+ RTE_LOG(ERR, TABLE,
+ "%s: Cannot allocate %u bytes for Cuckoo hash table\n",
+ __func__,
+ (uint32_t)sizeof(struct rte_table_hash));
+ return NULL;
+ }
+
+ /* Create cuckoo hash table */
+ struct rte_hash_parameters hash_cuckoo_params = {
+ .entries = p->n_keys,
+ .key_len = p->key_size,
+ .hash_func = (rte_hash_function)(p->f_hash),
+ .hash_func_init_val = p->seed,
+ .socket_id = socket_id,
+ .name = p->name
+ };
+
+ rte_hash_handle = rte_hash_find_existing(p->name);
+ if (rte_hash_handle == NULL) {
+ rte_hash_handle = rte_hash_create(&hash_cuckoo_params);
+ if (NULL == rte_hash_handle) {
+ RTE_LOG(ERR, TABLE,
+ "%s: failed to create cuckoo hash table. keysize: %u",
+ __func__, hash_cuckoo_params.key_len);
+ rte_free(t);
+ return NULL;
+ }
+ }
+
+ /* initialize the cuckoo hash parameters */
+ t->key_size = p->key_size;
+ t->entry_size = entry_size;
+ t->n_keys = p->n_keys;
+ t->f_hash = p->f_hash;
+ t->seed = p->seed;
+ t->signature_offset = p->signature_offset;
+ t->key_offset = p->key_offset;
+ t->name = p->name;
+ t->h_table = rte_hash_handle;
+
+ RTE_LOG(INFO, TABLE,
+ "%s: Cuckoo Hash table memory footprint is %u bytes\n",
+ __func__, total_size);
+ return t;
+}
+
+static int
+rte_table_hash_cuckoo_free(void *table) {
+ if (table == NULL) {
+ RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+ return -EINVAL;
+ }
+
+ struct rte_table_hash *t = (struct rte_table_hash *)table;
+
+ rte_hash_free(t->h_table);
+ rte_free(t);
+
+ return 0;
+}
+
+static int
+rte_table_hash_cuckoo_entry_add(void *table, void *key, void *entry,
+ int *key_found, void **entry_ptr) {
+ int pos = 0;
+
+ if (table == NULL) {
+ RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+ return -EINVAL;
+ }
+
+ if (key == NULL) {
+ RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__);
+ return -EINVAL;
+ }
+
+ if (entry == NULL) {
+ RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__);
+ return -EINVAL;
+ }
+
+ struct rte_table_hash *t = (struct rte_table_hash *)table;
+
+ /* Find Existing entries */
+ pos = rte_hash_lookup(t->h_table, key);
+ if (pos >= 0) {
+ uint8_t *existing_entry;
+
+ *key_found = 1;
+ existing_entry = &t->memory[pos * t->entry_size];
+ memcpy(existing_entry, entry, t->entry_size);
+ *entry_ptr = existing_entry;
+
+ return 0;
+} else if (pos == -ENOENT) {
+ /* Entry not found. Adding new entry */
+ uint8_t *new_entry;
+
+ pos = rte_hash_add_key(t->h_table, key);
+ if (pos < 0) {
+ RTE_LOG(ERR, TABLE,
+ "%s: Entry not added, status : %u\n",
+ __func__, pos);
+ return pos;
+ }
+
+ new_entry = &t->memory[pos * t->entry_size];
+ memcpy(new_entry, entry, t->entry_size);
+
+ *key_found = 0;
+ *entry_ptr = new_entry;
+ return 0;
+ }
+ return pos;
+}
+
+static int
+rte_table_hash_cuckoo_entry_delete(void *table, void *key,
+ int *key_found, __rte_unused void *entry) {
+ int pos = 0;
+
+ if (table == NULL) {
+ RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+ return -EINVAL;
+ }
+
+ if (key == NULL) {
+ RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__);
+ return -EINVAL;
+ }
+
+ struct rte_table_hash *t = (struct rte_table_hash *)table;
+
+ pos = rte_hash_del_key(t->h_table, key);
+ if (pos >= 0) {
+ *key_found = 1;
+ uint8_t *entry_ptr = &t->memory[pos * t->entry_size];
+
+ if (entry)
+ memcpy(entry, entry_ptr, t->entry_size);
+
+ memset(&t->memory[pos * t->entry_size], 0, t->entry_size);
+ }
+
+ return pos;
+}
+
+
+static int
+rte_table_hash_cuckoo_lookup_dosig(void *table,
+ struct rte_mbuf **pkts,
+ uint64_t pkts_mask,
+ uint64_t *lookup_hit_mask,
+ void **entries)
+{
+ struct rte_table_hash *t = (struct rte_table_hash *)table;
+ uint64_t pkts_mask_out = 0;
+ uint32_t i;
+
+ __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+
+ RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(t, n_pkts_in);
+
+ if ((pkts_mask & (pkts_mask + 1)) == 0) {
+ const uint8_t *keys[64];
+ int32_t positions[64], status;
+
+ /* Keys for bulk lookup */
+ for (i = 0; i < n_pkts_in; i++)
+ keys[i] = RTE_MBUF_METADATA_UINT8_PTR(pkts[i],
+ t->key_offset);
+
+ /* Bulk Lookup */
+ status = rte_hash_lookup_bulk(t->h_table,
+ (const void **) keys,
+ n_pkts_in,
+ positions);
+
+ if (status == 0) {
+ for (i = 0; i < n_pkts_in; i++) {
+ if (likely(positions[i] >= 0)) {
+ uint64_t pkt_mask = 1LLU << i;
+
+ entries[i] = &t->memory[positions[i]
+ * t->entry_size];
+ pkts_mask_out |= pkt_mask;
+ }
+ }
+ }
+ } else {
+ for (i = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX
+ - __builtin_clzll(pkts_mask)); i++) {
+ uint64_t pkt_mask = 1LLU << i;
+
+ if (pkt_mask & pkts_mask) {
+ struct rte_mbuf *pkt = pkts[i];
+ uint8_t *key = RTE_MBUF_METADATA_UINT8_PTR(pkt,
+ t->key_offset);
+ int pos;
+
+ pos = rte_hash_lookup(t->h_table, key);
+ if (likely(pos >= 0)) {
+ entries[i] = &t->memory[pos
+ * t->entry_size];
+ pkts_mask_out |= pkt_mask;
+ }
+ }
+ }
+ }
+
+ *lookup_hit_mask = pkts_mask_out;
+ RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(t,
+ n_pkts_in - __builtin_popcountll(pkts_mask_out));
+
+ return 0;
+
+}
+
+static int
+rte_table_hash_cuckoo_stats_read(void *table, struct rte_table_stats *stats,
+ int clear)
+{
+ struct rte_table_hash *t = (struct rte_table_hash *) table;
+
+ if (stats != NULL)
+ memcpy(stats, &t->stats, sizeof(t->stats));
+
+ if (clear)
+ memset(&t->stats, 0, sizeof(t->stats));
+
+ return 0;
+}
+
+struct rte_table_ops rte_table_hash_cuckoo_dosig_ops = {
+ .f_create = rte_table_hash_cuckoo_create,
+ .f_free = rte_table_hash_cuckoo_free,
+ .f_add = rte_table_hash_cuckoo_entry_add,
+ .f_delete = rte_table_hash_cuckoo_entry_delete,
+ .f_add_bulk = NULL,
+ .f_delete_bulk = NULL,
+ .f_lookup = rte_table_hash_cuckoo_lookup_dosig,
+ .f_stats = rte_table_hash_cuckoo_stats_read,
+};
diff --git a/lib/librte_table/rte_table_hash_key16.c b/lib/librte_table/rte_table_hash_key16.c
index b7e000fd..08d4d77e 100644
--- a/lib/librte_table/rte_table_hash_key16.c
+++ b/lib/librte_table/rte_table_hash_key16.c
@@ -130,7 +130,7 @@ rte_table_hash_create_key16_lru(void *params,
/* Check input parameters */
if ((check_params_create_lru(p) != 0) ||
((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
- ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0))
+ ((sizeof(struct rte_bucket_4_16) % 64) != 0))
return NULL;
n_entries_per_bucket = 4;
key_size = 16;
@@ -344,7 +344,7 @@ rte_table_hash_create_key16_ext(void *params,
/* Check input parameters */
if ((check_params_create_ext(p) != 0) ||
((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
- ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0))
+ ((sizeof(struct rte_bucket_4_16) % 64) != 0))
return NULL;
n_entries_per_bucket = 4;
diff --git a/lib/librte_table/rte_table_hash_key32.c b/lib/librte_table/rte_table_hash_key32.c
index a7aba492..161f6b7a 100644
--- a/lib/librte_table/rte_table_hash_key32.c
+++ b/lib/librte_table/rte_table_hash_key32.c
@@ -129,7 +129,7 @@ rte_table_hash_create_key32_lru(void *params,
/* Check input parameters */
if ((check_params_create_lru(p) != 0) ||
((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
- ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0)) {
+ ((sizeof(struct rte_bucket_4_32) % 64) != 0)) {
return NULL;
}
n_entries_per_bucket = 4;
@@ -337,7 +337,7 @@ rte_table_hash_create_key32_ext(void *params,
/* Check input parameters */
if ((check_params_create_ext(p) != 0) ||
((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
- ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0))
+ ((sizeof(struct rte_bucket_4_32) % 64) != 0))
return NULL;
n_entries_per_bucket = 4;
diff --git a/lib/librte_table/rte_table_hash_key8.c b/lib/librte_table/rte_table_hash_key8.c
index e2e2bdc4..b04f60dc 100644
--- a/lib/librte_table/rte_table_hash_key8.c
+++ b/lib/librte_table/rte_table_hash_key8.c
@@ -125,7 +125,7 @@ rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size)
/* Check input parameters */
if ((check_params_create_lru(p) != 0) ||
((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
- ((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) != 0)) {
+ ((sizeof(struct rte_bucket_4_8) % 64) != 0)) {
return NULL;
}
n_entries_per_bucket = 4;
@@ -332,7 +332,7 @@ rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size)
/* Check input parameters */
if ((check_params_create_ext(p) != 0) ||
((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
- ((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) != 0))
+ ((sizeof(struct rte_bucket_4_8) % 64) != 0))
return NULL;
n_entries_per_bucket = 4;
diff --git a/lib/librte_table/rte_table_version.map b/lib/librte_table/rte_table_version.map
index 459c2da3..e1eaa275 100644
--- a/lib/librte_table/rte_table_version.map
+++ b/lib/librte_table/rte_table_version.map
@@ -28,4 +28,11 @@ DPDK_2.2 {
rte_table_hash_key16_ext_dosig_ops;
rte_table_hash_key16_lru_dosig_ops;
+};
+
+DPDK_16.07 {
+ global:
+
+ rte_table_hash_cuckoo_dosig_ops;
+
} DPDK_2.0;
diff --git a/lib/librte_timer/rte_timer.h b/lib/librte_timer/rte_timer.h
index 77547c6b..a276a736 100644
--- a/lib/librte_timer/rte_timer.h
+++ b/lib/librte_timer/rte_timer.h
@@ -66,6 +66,7 @@
#include <stdio.h>
#include <stdint.h>
#include <stddef.h>
+#include <rte_common.h>
#ifdef __cplusplus
extern "C" {
@@ -91,6 +92,7 @@ enum rte_timer_type {
* config) and an owner (the id of the lcore that owns the timer).
*/
union rte_timer_status {
+ RTE_STD_C11
struct {
uint16_t state; /**< Stop, pending, running, config. */
int16_t owner; /**< The lcore that owns the timer. */
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 538adb0b..415ffc6e 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -39,25 +39,16 @@ EXPORT_MAP := rte_vhost_version.map
LIBABIVER := 3
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64
-ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
CFLAGS += -I vhost_user
LDLIBS += -lpthread
-else
-CFLAGS += -I vhost_cuse
-LDLIBS += -lfuse
-endif
ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
LDLIBS += -lnuma
endif
# all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := virtio-net.c vhost_rxtx.c
-ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c
-else
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c vhost_cuse/eventfd_copy.c
-endif
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c socket.c vhost.c vhost_user.c \
+ virtio_net.c
# install includes
SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
diff --git a/lib/librte_vhost/eventfd_link/Makefile b/lib/librte_vhost/eventfd_link/Makefile
deleted file mode 100644
index 3140e8bf..00000000
--- a/lib/librte_vhost/eventfd_link/Makefile
+++ /dev/null
@@ -1,41 +0,0 @@
-# BSD LICENSE
-#
-# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in
-# the documentation and/or other materials provided with the
-# distribution.
-# * Neither the name of Intel Corporation nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-RTE_KERNELDIR ?= /lib/modules/$(shell uname -r)/build
-
-obj-m += eventfd_link.o
-
-
-all:
- make -C $(RTE_KERNELDIR) M=$(PWD) modules
-
-clean:
- make -C $(RTE_KERNELDIR) M=$(PWD) clean
diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.c b/lib/librte_vhost/eventfd_link/eventfd_link.c
deleted file mode 100644
index 4b05b5a8..00000000
--- a/lib/librte_vhost/eventfd_link/eventfd_link.c
+++ /dev/null
@@ -1,277 +0,0 @@
-/*-
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- * The full GNU General Public License is included in this distribution
- * in the file called LICENSE.GPL.
- *
- * Contact Information:
- * Intel Corporation
- */
-
-#include <linux/miscdevice.h>
-#include <linux/module.h>
-#include <linux/file.h>
-#include <linux/fdtable.h>
-#include <linux/syscalls.h>
-
-#include "eventfd_link.h"
-
-
-/*
- * get_files_struct is copied from fs/file.c
- */
-struct files_struct *
-get_files_struct(struct task_struct *task)
-{
- struct files_struct *files;
-
- task_lock(task);
- files = task->files;
- if (files)
- atomic_inc(&files->count);
- task_unlock(task);
-
- return files;
-}
-
-/*
- * put_files_struct is extracted from fs/file.c
- */
-void
-put_files_struct(struct files_struct *files)
-{
- if (atomic_dec_and_test(&files->count))
- BUG();
-}
-
-static struct file *
-fget_from_files(struct files_struct *files, unsigned fd)
-{
- struct file *file;
-
- rcu_read_lock();
- file = fcheck_files(files, fd);
- if (file) {
- if (file->f_mode & FMODE_PATH ||
- !atomic_long_inc_not_zero(&file->f_count)) {
-
- file = NULL;
- }
- }
- rcu_read_unlock();
-
- return file;
-}
-
-static long
-eventfd_link_ioctl_copy2(unsigned long arg)
-{
- void __user *argp = (void __user *) arg;
- struct task_struct *task_target = NULL;
- struct file *file;
- struct files_struct *files;
- struct eventfd_copy2 eventfd_copy2;
- long ret = -EFAULT;
-
- if (copy_from_user(&eventfd_copy2, argp, sizeof(struct eventfd_copy2)))
- goto out;
-
- /*
- * Find the task struct for the target pid
- */
- ret = -ESRCH;
-
- task_target =
- get_pid_task(find_vpid(eventfd_copy2.pid), PIDTYPE_PID);
- if (task_target == NULL) {
- pr_info("Unable to find pid %d\n", eventfd_copy2.pid);
- goto out;
- }
-
- ret = -ESTALE;
- files = get_files_struct(task_target);
- if (files == NULL) {
- pr_info("Failed to get target files struct\n");
- goto out_task;
- }
-
- ret = -EBADF;
- file = fget_from_files(files, eventfd_copy2.fd);
- put_files_struct(files);
-
- if (file == NULL) {
- pr_info("Failed to get fd %d from target\n", eventfd_copy2.fd);
- goto out_task;
- }
-
- /*
- * Install the file struct from the target process into the
- * newly allocated file desciptor of the source process.
- */
- ret = get_unused_fd_flags(eventfd_copy2.flags);
- if (ret < 0) {
- fput(file);
- goto out_task;
- }
- fd_install(ret, file);
-
-out_task:
- put_task_struct(task_target);
-out:
- return ret;
-}
-
-static long
-eventfd_link_ioctl_copy(unsigned long arg)
-{
- void __user *argp = (void __user *) arg;
- struct task_struct *task_target = NULL;
- struct file *file;
- struct files_struct *files;
- struct fdtable *fdt;
- struct eventfd_copy eventfd_copy;
- long ret = -EFAULT;
-
- if (copy_from_user(&eventfd_copy, argp, sizeof(struct eventfd_copy)))
- goto out;
-
- /*
- * Find the task struct for the target pid
- */
- ret = -ESRCH;
-
- task_target =
- get_pid_task(find_vpid(eventfd_copy.target_pid), PIDTYPE_PID);
- if (task_target == NULL) {
- pr_info("Unable to find pid %d\n", eventfd_copy.target_pid);
- goto out;
- }
-
- ret = -ESTALE;
- files = get_files_struct(current);
- if (files == NULL) {
- pr_info("Failed to get current files struct\n");
- goto out_task;
- }
-
- ret = -EBADF;
- file = fget_from_files(files, eventfd_copy.source_fd);
-
- if (file == NULL) {
- pr_info("Failed to get fd %d from source\n",
- eventfd_copy.source_fd);
- put_files_struct(files);
- goto out_task;
- }
-
- /*
- * Release the existing eventfd in the source process
- */
- spin_lock(&files->file_lock);
- fput(file);
- filp_close(file, files);
- fdt = files_fdtable(files);
- fdt->fd[eventfd_copy.source_fd] = NULL;
- spin_unlock(&files->file_lock);
-
- put_files_struct(files);
-
- /*
- * Find the file struct associated with the target fd.
- */
-
- ret = -ESTALE;
- files = get_files_struct(task_target);
- if (files == NULL) {
- pr_info("Failed to get target files struct\n");
- goto out_task;
- }
-
- ret = -EBADF;
- file = fget_from_files(files, eventfd_copy.target_fd);
- put_files_struct(files);
-
- if (file == NULL) {
- pr_info("Failed to get fd %d from target\n",
- eventfd_copy.target_fd);
- goto out_task;
- }
-
- /*
- * Install the file struct from the target process into the
- * file desciptor of the source process,
- */
-
- fd_install(eventfd_copy.source_fd, file);
- ret = 0;
-
-out_task:
- put_task_struct(task_target);
-out:
- return ret;
-}
-
-static long
-eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
-{
- long ret = -ENOIOCTLCMD;
-
- switch (ioctl) {
- case EVENTFD_COPY:
- ret = eventfd_link_ioctl_copy(arg);
- break;
- case EVENTFD_COPY2:
- ret = eventfd_link_ioctl_copy2(arg);
- break;
- }
-
- return ret;
-}
-
-static const struct file_operations eventfd_link_fops = {
- .owner = THIS_MODULE,
- .unlocked_ioctl = eventfd_link_ioctl,
-};
-
-
-static struct miscdevice eventfd_link_misc = {
- .minor = MISC_DYNAMIC_MINOR,
- .name = "eventfd-link",
- .fops = &eventfd_link_fops,
-};
-
-static int __init
-eventfd_link_init(void)
-{
- return misc_register(&eventfd_link_misc);
-}
-
-module_init(eventfd_link_init);
-
-static void __exit
-eventfd_link_exit(void)
-{
- misc_deregister(&eventfd_link_misc);
-}
-
-module_exit(eventfd_link_exit);
-
-MODULE_VERSION("0.0.1");
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Anthony Fee");
-MODULE_DESCRIPTION("Link eventfd");
-MODULE_ALIAS("devname:eventfd-link");
diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.h b/lib/librte_vhost/eventfd_link/eventfd_link.h
deleted file mode 100644
index 5ebc20b8..00000000
--- a/lib/librte_vhost/eventfd_link/eventfd_link.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*-
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- * The full GNU General Public License is included in this distribution
- * in the file called LICENSE.GPL.
- *
- * Contact Information:
- * Intel Corporation
- *
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef _EVENTFD_LINK_H_
-#define _EVENTFD_LINK_H_
-
-/*
- * arguements for the EVENTFD_COPY ioctl
- */
-struct eventfd_copy {
- unsigned target_fd; /* fd in the target pid */
- unsigned source_fd; /* fd in the calling pid */
- pid_t target_pid; /* pid of the target pid */
-};
-
-/*
- * ioctl to copy an fd entry in calling process to an fd in a target process
- * NOTE: this one should be
- * #define EVENTFD_COPY _IOWR('D', 1, struct eventfd_copy) actually
- */
-#define EVENTFD_COPY 1
-
-/*
- * arguments for the EVENTFD_COPY2 ioctl
- */
-struct eventfd_copy2 {
- unsigned fd; /* fd to steal */
- pid_t pid; /* pid of the process to steal from */
- unsigned flags; /* flags to allocate new fd with */
-};
-
-/*
- * ioctl to copy an fd entry from the target process into newly allocated
- * fd in the calling process
- */
-#define EVENTFD_COPY2 _IOW('D', 2, struct eventfd_copy2)
-
-#endif /* _EVENTFD_LINK_H_ */
diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/fd_man.c
index 2d3eeb7d..2d3eeb7d 100644
--- a/lib/librte_vhost/vhost_user/fd_man.c
+++ b/lib/librte_vhost/fd_man.c
diff --git a/lib/librte_vhost/vhost_user/fd_man.h b/lib/librte_vhost/fd_man.h
index bd66ed1c..bd66ed1c 100644
--- a/lib/librte_vhost/vhost_user/fd_man.h
+++ b/lib/librte_vhost/fd_man.h
diff --git a/lib/librte_vhost/libvirt/qemu-wrap.py b/lib/librte_vhost/libvirt/qemu-wrap.py
deleted file mode 100755
index e6a2cc9d..00000000
--- a/lib/librte_vhost/libvirt/qemu-wrap.py
+++ /dev/null
@@ -1,387 +0,0 @@
-#!/usr/bin/python
-#/*
-# * BSD LICENSE
-# *
-# * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-# * All rights reserved.
-# *
-# * Redistribution and use in source and binary forms, with or without
-# * modification, are permitted provided that the following conditions
-# * are met:
-# *
-# * * Redistributions of source code must retain the above copyright
-# * notice, this list of conditions and the following disclaimer.
-# * * Redistributions in binary form must reproduce the above copyright
-# * notice, this list of conditions and the following disclaimer in
-# * the documentation and/or other materials provided with the
-# * distribution.
-# * * Neither the name of Intel Corporation nor the names of its
-# * contributors may be used to endorse or promote products derived
-# * from this software without specific prior written permission.
-# *
-# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-# */
-
-#####################################################################
-# This script is designed to modify the call to the QEMU emulator
-# to support userspace vhost when starting a guest machine through
-# libvirt with vhost enabled. The steps to enable this are as follows
-# and should be run as root:
-#
-# 1. Place this script in a libvirtd's binary search PATH ($PATH)
-# A good location would be in the same directory that the QEMU
-# binary is located
-#
-# 2. Ensure that the script has the same owner/group and file
-# permissions as the QEMU binary
-#
-# 3. Update the VM xml file using "virsh edit VM.xml"
-#
-# 3.a) Set the VM to use the launch script
-#
-# Set the emulator path contained in the
-# <emulator><emulator/> tags
-#
-# e.g replace <emulator>/usr/bin/qemu-kvm<emulator/>
-# with <emulator>/usr/bin/qemu-wrap.py<emulator/>
-#
-# 3.b) Set the VM's device's to use vhost-net offload
-#
-# <interface type="network">
-# <model type="virtio"/>
-# <driver name="vhost"/>
-# <interface/>
-#
-# 4. Enable libvirt to access our userpace device file by adding it to
-# controllers cgroup for libvirtd using the following steps
-#
-# 4.a) In /etc/libvirt/qemu.conf add/edit the following lines:
-# 1) cgroup_controllers = [ ... "devices", ... ]
-# 2) clear_emulator_capabilities = 0
-# 3) user = "root"
-# 4) group = "root"
-# 5) cgroup_device_acl = [
-# "/dev/null", "/dev/full", "/dev/zero",
-# "/dev/random", "/dev/urandom",
-# "/dev/ptmx", "/dev/kvm", "/dev/kqemu",
-# "/dev/rtc", "/dev/hpet", "/dev/net/tun",
-# "/dev/<devbase-name>",
-# "/dev/hugepages",
-# ]
-#
-# 4.b) Disable SELinux or set to permissive mode
-#
-# 4.c) Mount cgroup device controller
-# "mkdir /dev/cgroup"
-# "mount -t cgroup none /dev/cgroup -o devices"
-#
-# 4.d) Set hugetlbfs_mount variable - ( Optional )
-# VMs using userspace vhost must use hugepage backed
-# memory. This can be enabled in the libvirt XML
-# config by adding a memory backing section to the
-# XML config e.g.
-# <memoryBacking>
-# <hugepages/>
-# </memoryBacking>
-# This memory backing section should be added after the
-# <memory> and <currentMemory> sections. This will add
-# flags "-mem-prealloc -mem-path <path>" to the QEMU
-# command line. The hugetlbfs_mount variable can be used
-# to override the default <path> passed through by libvirt.
-#
-# if "-mem-prealloc" or "-mem-path <path>" are not passed
-# through and a vhost device is detected then these options will
-# be automatically added by this script. This script will detect
-# the system hugetlbfs mount point to be used for <path>. The
-# default <path> for this script can be overidden by the
-# hugetlbfs_dir variable in the configuration section of this script.
-#
-#
-# 4.e) Restart the libvirtd system process
-# e.g. on Fedora "systemctl restart libvirtd.service"
-#
-#
-# 4.f) Edit the Configuration Parameters section of this script
-# to point to the correct emulator location and set any
-# addition options
-#
-# The script modifies the libvirtd Qemu call by modifying/adding
-# options based on the configuration parameters below.
-# NOTE:
-# emul_path and us_vhost_path must be set
-# All other parameters are optional
-#####################################################################
-
-
-#############################################
-# Configuration Parameters
-#############################################
-#Path to QEMU binary
-emul_path = "/usr/local/bin/qemu-system-x86_64"
-
-#Path to userspace vhost device file
-# This filename should match the --dev-basename parameters of
-# the command used to launch the userspace vhost sample application e.g.
-# if the sample app lauch command is:
-# ./build/vhost-switch ..... --dev-basename usvhost
-# then this variable should be set to:
-# us_vhost_path = "/dev/usvhost"
-us_vhost_path = "/dev/usvhost"
-
-#List of additional user defined emulation options. These options will
-#be added to all Qemu calls
-emul_opts_user = []
-
-#List of additional user defined emulation options for vhost only.
-#These options will only be added to vhost enabled guests
-emul_opts_user_vhost = []
-
-#For all VHOST enabled VMs, the VM memory is preallocated from hugetlbfs
-# Set this variable to one to enable this option for all VMs
-use_huge_all = 0
-
-#Instead of autodetecting, override the hugetlbfs directory by setting
-#this variable
-hugetlbfs_dir = ""
-
-#############################################
-
-
-#############################################
-# ****** Do Not Modify Below this Line ******
-#############################################
-
-import sys, os, subprocess
-import time
-import signal
-
-
-#List of open userspace vhost file descriptors
-fd_list = []
-
-#additional virtio device flags when using userspace vhost
-vhost_flags = [ "csum=off",
- "gso=off",
- "guest_tso4=off",
- "guest_tso6=off",
- "guest_ecn=off"
- ]
-
-#String of the path to the Qemu process pid
-qemu_pid = "/tmp/%d-qemu.pid" % os.getpid()
-
-#############################################
-# Signal haldler to kill Qemu subprocess
-#############################################
-def kill_qemu_process(signum, stack):
- pidfile = open(qemu_pid, 'r')
- pid = int(pidfile.read())
- os.killpg(pid, signal.SIGTERM)
- pidfile.close()
-
-
-#############################################
-# Find the system hugefile mount point.
-# Note:
-# if multiple hugetlbfs mount points exist
-# then the first one found will be used
-#############################################
-def find_huge_mount():
-
- if (len(hugetlbfs_dir)):
- return hugetlbfs_dir
-
- huge_mount = ""
-
- if (os.access("/proc/mounts", os.F_OK)):
- f = open("/proc/mounts", "r")
- line = f.readline()
- while line:
- line_split = line.split(" ")
- if line_split[2] == 'hugetlbfs':
- huge_mount = line_split[1]
- break
- line = f.readline()
- else:
- print "/proc/mounts not found"
- exit (1)
-
- f.close
- if len(huge_mount) == 0:
- print "Failed to find hugetlbfs mount point"
- exit (1)
-
- return huge_mount
-
-
-#############################################
-# Get a userspace Vhost file descriptor
-#############################################
-def get_vhost_fd():
-
- if (os.access(us_vhost_path, os.F_OK)):
- fd = os.open( us_vhost_path, os.O_RDWR)
- else:
- print ("US-Vhost file %s not found" %us_vhost_path)
- exit (1)
-
- return fd
-
-
-#############################################
-# Check for vhostfd. if found then replace
-# with our own vhost fd and append any vhost
-# flags onto the end
-#############################################
-def modify_netdev_arg(arg):
-
- global fd_list
- vhost_in_use = 0
- s = ''
- new_opts = []
- netdev_opts = arg.split(",")
-
- for opt in netdev_opts:
- #check if vhost is used
- if "vhost" == opt[:5]:
- vhost_in_use = 1
- else:
- new_opts.append(opt)
-
- #if using vhost append vhost options
- if vhost_in_use == 1:
- #append vhost on option
- new_opts.append('vhost=on')
- #append vhostfd ption
- new_fd = get_vhost_fd()
- new_opts.append('vhostfd=' + str(new_fd))
- fd_list.append(new_fd)
-
- #concatenate all options
- for opt in new_opts:
- if len(s) > 0:
- s+=','
-
- s+=opt
-
- return s
-
-
-#############################################
-# Main
-#############################################
-def main():
-
- global fd_list
- global vhost_in_use
- new_args = []
- num_cmd_args = len(sys.argv)
- emul_call = ''
- mem_prealloc_set = 0
- mem_path_set = 0
- num = 0;
-
- #parse the parameters
- while (num < num_cmd_args):
- arg = sys.argv[num]
-
- #Check netdev +1 parameter for vhostfd
- if arg == '-netdev':
- num_vhost_devs = len(fd_list)
- new_args.append(arg)
-
- num+=1
- arg = sys.argv[num]
- mod_arg = modify_netdev_arg(arg)
- new_args.append(mod_arg)
-
- #append vhost flags if this is a vhost device
- # and -device is the next arg
- # i.e -device -opt1,-opt2,...,-opt3,%vhost
- if (num_vhost_devs < len(fd_list)):
- num+=1
- arg = sys.argv[num]
- if arg == '-device':
- new_args.append(arg)
- num+=1
- new_arg = sys.argv[num]
- for flag in vhost_flags:
- new_arg = ''.join([new_arg,',',flag])
- new_args.append(new_arg)
- else:
- new_args.append(arg)
- elif arg == '-mem-prealloc':
- mem_prealloc_set = 1
- new_args.append(arg)
- elif arg == '-mem-path':
- mem_path_set = 1
- new_args.append(arg)
-
- else:
- new_args.append(arg)
-
- num+=1
-
- #Set Qemu binary location
- emul_call+=emul_path
- emul_call+=" "
-
- #Add prealloc mem options if using vhost and not already added
- if ((len(fd_list) > 0) and (mem_prealloc_set == 0)):
- emul_call += "-mem-prealloc "
-
- #Add mempath mem options if using vhost and not already added
- if ((len(fd_list) > 0) and (mem_path_set == 0)):
- #Detect and add hugetlbfs mount point
- mp = find_huge_mount()
- mp = "".join(["-mem-path ", mp])
- emul_call += mp
- emul_call += " "
-
- #add user options
- for opt in emul_opts_user:
- emul_call += opt
- emul_call += " "
-
- #Add add user vhost only options
- if len(fd_list) > 0:
- for opt in emul_opts_user_vhost:
- emul_call += opt
- emul_call += " "
-
- #Add updated libvirt options
- iter_args = iter(new_args)
- #skip 1st arg i.e. call to this script
- next(iter_args)
- for arg in iter_args:
- emul_call+=str(arg)
- emul_call+= " "
-
- emul_call += "-pidfile %s " % qemu_pid
- #Call QEMU
- process = subprocess.Popen(emul_call, shell=True, preexec_fn=os.setsid)
-
- for sig in [signal.SIGTERM, signal.SIGINT, signal.SIGHUP, signal.SIGQUIT]:
- signal.signal(sig, kill_qemu_process)
-
- process.wait()
-
- #Close usvhost files
- for fd in fd_list:
- os.close(fd)
- #Cleanup temporary files
- if os.access(qemu_pid, os.F_OK):
- os.remove(qemu_pid)
-
-if __name__ == "__main__":
- main()
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 9caa6221..926039c5 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -53,16 +53,13 @@
#define RTE_VHOST_USER_CLIENT (1ULL << 0)
#define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
+#define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
/* Enum for virtqueue management. */
enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
/**
* Device and vring operations.
- *
- * Make sure to set VIRTIO_DEV_RUNNING to the device flags in new_device and
- * remove it in destroy_device.
- *
*/
struct virtio_net_device_ops {
int (*new_device)(int vid); /**< Add device. */
@@ -126,9 +123,8 @@ int rte_vhost_get_numa_node(int vid);
uint32_t rte_vhost_get_queue_num(int vid);
/**
- * Get the virtio net device's ifname. For vhost-cuse, ifname is the
- * path of the char device. For vhost-user, ifname is the vhost-user
- * socket file path.
+ * Get the virtio net device's ifname, which is the vhost-user socket
+ * file path.
*
* @param vid
* virtio-net device ID
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/socket.c
index b35594d9..aaa9c270 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/socket.c
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -47,12 +47,10 @@
#include <pthread.h>
#include <rte_log.h>
-#include <rte_virtio_net.h>
#include "fd_man.h"
-#include "vhost-net-user.h"
-#include "vhost-net.h"
-#include "virtio-net-user.h"
+#include "vhost.h"
+#include "vhost_user.h"
/*
* Every time rte_vhost_driver_register() is invoked, an associated
@@ -64,6 +62,7 @@ struct vhost_user_socket {
int connfd;
bool is_server;
bool reconnect;
+ bool dequeue_zero_copy;
};
struct vhost_user_connection {
@@ -82,7 +81,7 @@ struct vhost_user {
#define MAX_VIRTIO_BACKLOG 128
static void vhost_user_server_new_connection(int fd, void *data, int *remove);
-static void vhost_user_msg_handler(int fd, void *dat, int *remove);
+static void vhost_user_read_cb(int fd, void *dat, int *remove);
static int vhost_user_create_client(struct vhost_user_socket *vsocket);
static struct vhost_user vhost_user = {
@@ -95,31 +94,8 @@ static struct vhost_user vhost_user = {
.mutex = PTHREAD_MUTEX_INITIALIZER,
};
-static const char *vhost_message_str[VHOST_USER_MAX] = {
- [VHOST_USER_NONE] = "VHOST_USER_NONE",
- [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
- [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
- [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
- [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
- [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
- [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
- [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
- [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
- [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
- [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
- [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
- [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
- [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
- [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
- [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
- [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
- [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
- [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
- [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
-};
-
/* return bytes# of read on success or negative val on failure. */
-static int
+int
read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
{
struct iovec iov;
@@ -161,37 +137,7 @@ read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
return ret;
}
-/* return bytes# of read on success or negative val on failure. */
-static int
-read_vhost_message(int sockfd, struct VhostUserMsg *msg)
-{
- int ret;
-
- ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
- msg->fds, VHOST_MEMORY_MAX_NREGIONS);
- if (ret <= 0)
- return ret;
-
- if (msg && msg->size) {
- if (msg->size > sizeof(msg->payload)) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "invalid msg size: %d\n", msg->size);
- return -1;
- }
- ret = read(sockfd, &msg->payload, msg->size);
- if (ret <= 0)
- return ret;
- if (ret != (int)msg->size) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "read control message failed\n");
- return -1;
- }
- }
-
- return ret;
-}
-
-static int
+int
send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
{
@@ -234,25 +180,6 @@ send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
return ret;
}
-static int
-send_vhost_message(int sockfd, struct VhostUserMsg *msg)
-{
- int ret;
-
- if (!msg)
- return 0;
-
- msg->flags &= ~VHOST_USER_VERSION_MASK;
- msg->flags |= VHOST_USER_VERSION;
- msg->flags |= VHOST_USER_REPLY_MASK;
-
- ret = send_fd_message(sockfd, (char *)msg,
- VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
-
- return ret;
-}
-
-
static void
vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
{
@@ -277,12 +204,15 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
size = strnlen(vsocket->path, PATH_MAX);
vhost_set_ifname(vid, vsocket->path, size);
+ if (vsocket->dequeue_zero_copy)
+ vhost_enable_dequeue_zero_copy(vid);
+
RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
vsocket->connfd = fd;
conn->vsocket = vsocket;
conn->vid = vid;
- ret = fdset_add(&vhost_user.fdset, fd, vhost_user_msg_handler,
+ ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
NULL, conn);
if (ret < 0) {
vsocket->connfd = -1;
@@ -308,134 +238,23 @@ vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
vhost_user_add_connection(fd, vsocket);
}
-/* callback when there is message on the connfd */
static void
-vhost_user_msg_handler(int connfd, void *dat, int *remove)
+vhost_user_read_cb(int connfd, void *dat, int *remove)
{
- int vid;
struct vhost_user_connection *conn = dat;
- struct VhostUserMsg msg;
- uint64_t features;
+ struct vhost_user_socket *vsocket = conn->vsocket;
int ret;
- vid = conn->vid;
- ret = read_vhost_message(connfd, &msg);
- if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
- struct vhost_user_socket *vsocket = conn->vsocket;
-
- if (ret < 0)
- RTE_LOG(ERR, VHOST_CONFIG,
- "vhost read message failed\n");
- else if (ret == 0)
- RTE_LOG(INFO, VHOST_CONFIG,
- "vhost peer closed\n");
- else
- RTE_LOG(ERR, VHOST_CONFIG,
- "vhost read incorrect message\n");
-
+ ret = vhost_user_msg_handler(conn->vid, connfd);
+ if (ret < 0) {
vsocket->connfd = -1;
close(connfd);
*remove = 1;
+ vhost_destroy_device(conn->vid);
free(conn);
- vhost_destroy_device(vid);
if (vsocket->reconnect)
vhost_user_create_client(vsocket);
-
- return;
- }
-
- RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
- vhost_message_str[msg.request]);
- switch (msg.request) {
- case VHOST_USER_GET_FEATURES:
- ret = vhost_get_features(vid, &features);
- msg.payload.u64 = features;
- msg.size = sizeof(msg.payload.u64);
- send_vhost_message(connfd, &msg);
- break;
- case VHOST_USER_SET_FEATURES:
- features = msg.payload.u64;
- vhost_set_features(vid, &features);
- break;
-
- case VHOST_USER_GET_PROTOCOL_FEATURES:
- msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
- msg.size = sizeof(msg.payload.u64);
- send_vhost_message(connfd, &msg);
- break;
- case VHOST_USER_SET_PROTOCOL_FEATURES:
- user_set_protocol_features(vid, msg.payload.u64);
- break;
-
- case VHOST_USER_SET_OWNER:
- vhost_set_owner(vid);
- break;
- case VHOST_USER_RESET_OWNER:
- vhost_reset_owner(vid);
- break;
-
- case VHOST_USER_SET_MEM_TABLE:
- user_set_mem_table(vid, &msg);
- break;
-
- case VHOST_USER_SET_LOG_BASE:
- user_set_log_base(vid, &msg);
-
- /* it needs a reply */
- msg.size = sizeof(msg.payload.u64);
- send_vhost_message(connfd, &msg);
- break;
- case VHOST_USER_SET_LOG_FD:
- close(msg.fds[0]);
- RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
- break;
-
- case VHOST_USER_SET_VRING_NUM:
- vhost_set_vring_num(vid, &msg.payload.state);
- break;
- case VHOST_USER_SET_VRING_ADDR:
- vhost_set_vring_addr(vid, &msg.payload.addr);
- break;
- case VHOST_USER_SET_VRING_BASE:
- vhost_set_vring_base(vid, &msg.payload.state);
- break;
-
- case VHOST_USER_GET_VRING_BASE:
- ret = user_get_vring_base(vid, &msg.payload.state);
- msg.size = sizeof(msg.payload.state);
- send_vhost_message(connfd, &msg);
- break;
-
- case VHOST_USER_SET_VRING_KICK:
- user_set_vring_kick(vid, &msg);
- break;
- case VHOST_USER_SET_VRING_CALL:
- user_set_vring_call(vid, &msg);
- break;
-
- case VHOST_USER_SET_VRING_ERR:
- if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
- close(msg.fds[0]);
- RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
- break;
-
- case VHOST_USER_GET_QUEUE_NUM:
- msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS;
- msg.size = sizeof(msg.payload.u64);
- send_vhost_message(connfd, &msg);
- break;
-
- case VHOST_USER_SET_VRING_ENABLE:
- user_set_vring_enable(vid, &msg.payload.state);
- break;
- case VHOST_USER_SEND_RARP:
- user_send_rarp(vid, &msg);
- break;
-
- default:
- break;
-
}
}
@@ -684,6 +503,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
memset(vsocket, 0, sizeof(struct vhost_user_socket));
vsocket->path = strdup(path);
vsocket->connfd = -1;
+ vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
new file mode 100644
index 00000000..31825b82
--- /dev/null
+++ b/lib/librte_vhost/vhost.c
@@ -0,0 +1,430 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numaif.h>
+#endif
+
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_virtio_net.h>
+
+#include "vhost.h"
+
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+
+/* Features supported by this lib. */
+#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
+ (1ULL << VIRTIO_NET_F_CTRL_VQ) | \
+ (1ULL << VIRTIO_NET_F_CTRL_RX) | \
+ (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
+ (VHOST_SUPPORTS_MQ) | \
+ (1ULL << VIRTIO_F_VERSION_1) | \
+ (1ULL << VHOST_F_LOG_ALL) | \
+ (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+ (1ULL << VIRTIO_NET_F_HOST_TSO4) | \
+ (1ULL << VIRTIO_NET_F_HOST_TSO6) | \
+ (1ULL << VIRTIO_NET_F_CSUM) | \
+ (1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
+ (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
+ (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
+ (1ULL << VIRTIO_RING_F_INDIRECT_DESC))
+
+uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
+
+struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
+
+/* device ops to add/remove device to/from data core. */
+struct virtio_net_device_ops const *notify_ops;
+
+struct virtio_net *
+get_device(int vid)
+{
+ struct virtio_net *dev = vhost_devices[vid];
+
+ if (unlikely(!dev)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%d) device not found.\n", vid);
+ }
+
+ return dev;
+}
+
+static void
+cleanup_vq(struct vhost_virtqueue *vq, int destroy)
+{
+ if ((vq->callfd >= 0) && (destroy != 0))
+ close(vq->callfd);
+ if (vq->kickfd >= 0)
+ close(vq->kickfd);
+}
+
+/*
+ * Unmap any memory, close any file descriptors and
+ * free any memory owned by a device.
+ */
+void
+cleanup_device(struct virtio_net *dev, int destroy)
+{
+ uint32_t i;
+
+ vhost_backend_cleanup(dev);
+
+ for (i = 0; i < dev->virt_qp_nb; i++) {
+ cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ], destroy);
+ cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ], destroy);
+ }
+}
+
+/*
+ * Release virtqueues and device memory.
+ */
+static void
+free_device(struct virtio_net *dev)
+{
+ uint32_t i;
+ struct vhost_virtqueue *rxq, *txq;
+
+ for (i = 0; i < dev->virt_qp_nb; i++) {
+ rxq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
+ txq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
+
+ rte_free(rxq->shadow_used_ring);
+ rte_free(txq->shadow_used_ring);
+
+ /* rxq and txq are allocated together as queue-pair */
+ rte_free(rxq);
+ }
+
+ rte_free(dev);
+}
+
+static void
+init_vring_queue(struct vhost_virtqueue *vq, int qp_idx)
+{
+ memset(vq, 0, sizeof(struct vhost_virtqueue));
+
+ vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
+ vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
+ /* Backends are set to -1 indicating an inactive device. */
+ vq->backend = -1;
+
+ /* always set the default vq pair to enabled */
+ if (qp_idx == 0)
+ vq->enabled = 1;
+
+ TAILQ_INIT(&vq->zmbuf_list);
+}
+
+static void
+init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
+{
+ uint32_t base_idx = qp_idx * VIRTIO_QNUM;
+
+ init_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx);
+ init_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx);
+}
+
+static void
+reset_vring_queue(struct vhost_virtqueue *vq, int qp_idx)
+{
+ int callfd;
+
+ callfd = vq->callfd;
+ init_vring_queue(vq, qp_idx);
+ vq->callfd = callfd;
+}
+
+static void
+reset_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
+{
+ uint32_t base_idx = qp_idx * VIRTIO_QNUM;
+
+ reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx);
+ reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx);
+}
+
+int
+alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
+{
+ struct vhost_virtqueue *virtqueue = NULL;
+ uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
+ uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
+
+ virtqueue = rte_malloc(NULL,
+ sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
+ if (virtqueue == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to allocate memory for virt qp:%d.\n", qp_idx);
+ return -1;
+ }
+
+ dev->virtqueue[virt_rx_q_idx] = virtqueue;
+ dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
+
+ init_vring_queue_pair(dev, qp_idx);
+
+ dev->virt_qp_nb += 1;
+
+ return 0;
+}
+
+/*
+ * Reset some variables in device structure, while keeping few
+ * others untouched, such as vid, ifname, virt_qp_nb: they
+ * should be same unless the device is removed.
+ */
+void
+reset_device(struct virtio_net *dev)
+{
+ uint32_t i;
+
+ dev->features = 0;
+ dev->protocol_features = 0;
+ dev->flags = 0;
+
+ for (i = 0; i < dev->virt_qp_nb; i++)
+ reset_vring_queue_pair(dev, i);
+}
+
+/*
+ * Invoked when there is a new vhost-user connection established (when
+ * there is a new virtio device being attached).
+ */
+int
+vhost_new_device(void)
+{
+ struct virtio_net *dev;
+ int i;
+
+ dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
+ if (dev == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to allocate memory for new dev.\n");
+ return -1;
+ }
+
+ for (i = 0; i < MAX_VHOST_DEVICE; i++) {
+ if (vhost_devices[i] == NULL)
+ break;
+ }
+ if (i == MAX_VHOST_DEVICE) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to find a free slot for new device.\n");
+ return -1;
+ }
+
+ vhost_devices[i] = dev;
+ dev->vid = i;
+
+ return i;
+}
+
+/*
+ * Invoked when there is the vhost-user connection is broken (when
+ * the virtio device is being detached).
+ */
+void
+vhost_destroy_device(int vid)
+{
+ struct virtio_net *dev = get_device(vid);
+
+ if (dev == NULL)
+ return;
+
+ if (dev->flags & VIRTIO_DEV_RUNNING) {
+ dev->flags &= ~VIRTIO_DEV_RUNNING;
+ notify_ops->destroy_device(vid);
+ }
+
+ cleanup_device(dev, 1);
+ free_device(dev);
+
+ vhost_devices[vid] = NULL;
+}
+
+void
+vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
+{
+ struct virtio_net *dev;
+ unsigned int len;
+
+ dev = get_device(vid);
+ if (dev == NULL)
+ return;
+
+ len = if_len > sizeof(dev->ifname) ?
+ sizeof(dev->ifname) : if_len;
+
+ strncpy(dev->ifname, if_name, len);
+ dev->ifname[sizeof(dev->ifname) - 1] = '\0';
+}
+
+void
+vhost_enable_dequeue_zero_copy(int vid)
+{
+ struct virtio_net *dev = get_device(vid);
+
+ if (dev == NULL)
+ return;
+
+ dev->dequeue_zero_copy = 1;
+}
+
+int
+rte_vhost_get_numa_node(int vid)
+{
+#ifdef RTE_LIBRTE_VHOST_NUMA
+ struct virtio_net *dev = get_device(vid);
+ int numa_node;
+ int ret;
+
+ if (dev == NULL)
+ return -1;
+
+ ret = get_mempolicy(&numa_node, NULL, 0, dev,
+ MPOL_F_NODE | MPOL_F_ADDR);
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%d) failed to query numa node: %d\n", vid, ret);
+ return -1;
+ }
+
+ return numa_node;
+#else
+ RTE_SET_USED(vid);
+ return -1;
+#endif
+}
+
+uint32_t
+rte_vhost_get_queue_num(int vid)
+{
+ struct virtio_net *dev = get_device(vid);
+
+ if (dev == NULL)
+ return 0;
+
+ return dev->virt_qp_nb;
+}
+
+int
+rte_vhost_get_ifname(int vid, char *buf, size_t len)
+{
+ struct virtio_net *dev = get_device(vid);
+
+ if (dev == NULL)
+ return -1;
+
+ len = RTE_MIN(len, sizeof(dev->ifname));
+
+ strncpy(buf, dev->ifname, len);
+ buf[len - 1] = '\0';
+
+ return 0;
+}
+
+uint16_t
+rte_vhost_avail_entries(int vid, uint16_t queue_id)
+{
+ struct virtio_net *dev;
+ struct vhost_virtqueue *vq;
+
+ dev = get_device(vid);
+ if (!dev)
+ return 0;
+
+ vq = dev->virtqueue[queue_id];
+ if (!vq->enabled)
+ return 0;
+
+ return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
+}
+
+int
+rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
+{
+ struct virtio_net *dev = get_device(vid);
+
+ if (dev == NULL)
+ return -1;
+
+ if (enable) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "guest notification isn't supported.\n");
+ return -1;
+ }
+
+ dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY;
+ return 0;
+}
+
+uint64_t rte_vhost_feature_get(void)
+{
+ return VHOST_FEATURES;
+}
+
+int rte_vhost_feature_disable(uint64_t feature_mask)
+{
+ VHOST_FEATURES = VHOST_FEATURES & ~feature_mask;
+ return 0;
+}
+
+int rte_vhost_feature_enable(uint64_t feature_mask)
+{
+ if ((feature_mask & VHOST_SUPPORTED_FEATURES) == feature_mask) {
+ VHOST_FEATURES = VHOST_FEATURES | feature_mask;
+ return 0;
+ }
+ return -1;
+}
+
+/*
+ * Register ops so that we can add/remove device to data core.
+ */
+int
+rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const ops)
+{
+ notify_ops = ops;
+
+ return 0;
+}
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost.h
index 38593a29..22564f1c 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost.h
@@ -36,6 +36,7 @@
#include <stdint.h>
#include <stdio.h>
#include <sys/types.h>
+#include <sys/queue.h>
#include <unistd.h>
#include <linux/vhost.h>
@@ -61,6 +62,19 @@ struct buf_vector {
uint32_t desc_idx;
};
+/*
+ * A structure to hold some fields needed in zero copy code path,
+ * mainly for associating an mbuf with the right desc_idx.
+ */
+struct zcopy_mbuf {
+ struct rte_mbuf *mbuf;
+ uint32_t desc_idx;
+ uint16_t in_use;
+
+ TAILQ_ENTRY(zcopy_mbuf) next;
+};
+TAILQ_HEAD(zcopy_mbuf_list, zcopy_mbuf);
+
/**
* Structure contains variables relevant to RX/TX virtqueues.
*/
@@ -70,8 +84,8 @@ struct vhost_virtqueue {
struct vring_used *used;
uint32_t size;
- /* Last index used on the available ring */
- volatile uint16_t last_used_idx;
+ uint16_t last_avail_idx;
+ uint16_t last_used_idx;
#define VIRTIO_INVALID_EVENTFD (-1)
#define VIRTIO_UNINITIALIZED_EVENTFD (-2)
@@ -85,6 +99,15 @@ struct vhost_virtqueue {
/* Physical address of used ring, for logging */
uint64_t log_guest_addr;
+
+ uint16_t nr_zmbuf;
+ uint16_t zmbuf_size;
+ uint16_t last_zmbuf_idx;
+ struct zcopy_mbuf *zmbufs;
+ struct zcopy_mbuf_list zmbuf_list;
+
+ struct vring_used_elem *shadow_used_ring;
+ uint16_t shadow_used_idx;
} __rte_cache_aligned;
/* Old kernels have no such macro defined */
@@ -114,6 +137,12 @@ struct vhost_virtqueue {
#define VIRTIO_F_VERSION_1 32
#endif
+struct guest_page {
+ uint64_t guest_phys_addr;
+ uint64_t host_phys_addr;
+ uint64_t size;
+};
+
/**
* Device structure contains all configuration information relating
* to the device.
@@ -129,6 +158,7 @@ struct virtio_net {
/* to tell if we need broadcast rarp packet */
rte_atomic16_t broadcast_rarp;
uint32_t virt_qp_nb;
+ int dequeue_zero_copy;
struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
char ifname[IF_NAME_SZ];
@@ -137,18 +167,23 @@ struct virtio_net {
uint64_t log_addr;
struct ether_addr mac;
+ uint32_t nr_guest_pages;
+ uint32_t max_guest_pages;
+ struct guest_page *guest_pages;
} __rte_cache_aligned;
/**
* Information relating to memory regions including offsets to
* addresses in QEMUs memory file.
*/
-struct virtio_memory_regions {
- uint64_t guest_phys_address;
- uint64_t guest_phys_address_end;
- uint64_t memory_size;
- uint64_t userspace_address;
- uint64_t address_offset;
+struct virtio_memory_region {
+ uint64_t guest_phys_addr;
+ uint64_t guest_user_addr;
+ uint64_t host_user_addr;
+ uint64_t size;
+ void *mmap_addr;
+ uint64_t mmap_size;
+ int fd;
};
@@ -156,12 +191,8 @@ struct virtio_memory_regions {
* Memory structure includes region and mapping information.
*/
struct virtio_memory {
- /* Base QEMU userspace address of the memory file. */
- uint64_t base_address;
- uint64_t mapped_address;
- uint64_t mapped_size;
uint32_t nregions;
- struct virtio_memory_regions regions[0];
+ struct virtio_memory_region regions[0];
};
@@ -196,54 +227,66 @@ struct virtio_memory {
#define PRINT_PACKET(device, addr, size, header) do {} while (0)
#endif
-/**
- * Function to convert guest physical addresses to vhost virtual addresses.
- * This is used to convert guest virtio buffer addresses.
- */
+extern uint64_t VHOST_FEATURES;
+#define MAX_VHOST_DEVICE 1024
+extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
+
+/* Convert guest physical Address to host virtual address */
static inline uint64_t __attribute__((always_inline))
-gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
+gpa_to_vva(struct virtio_net *dev, uint64_t gpa)
{
- struct virtio_memory_regions *region;
- uint32_t regionidx;
- uint64_t vhost_va = 0;
-
- for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
- region = &dev->mem->regions[regionidx];
- if ((guest_pa >= region->guest_phys_address) &&
- (guest_pa <= region->guest_phys_address_end)) {
- vhost_va = region->address_offset + guest_pa;
- break;
+ struct virtio_memory_region *reg;
+ uint32_t i;
+
+ for (i = 0; i < dev->mem->nregions; i++) {
+ reg = &dev->mem->regions[i];
+ if (gpa >= reg->guest_phys_addr &&
+ gpa < reg->guest_phys_addr + reg->size) {
+ return gpa - reg->guest_phys_addr +
+ reg->host_user_addr;
}
}
- return vhost_va;
+
+ return 0;
}
-struct virtio_net_device_ops const *notify_ops;
-struct virtio_net *get_device(int vid);
+/* Convert guest physical address to host physical address */
+static inline phys_addr_t __attribute__((always_inline))
+gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
+{
+ uint32_t i;
+ struct guest_page *page;
-int vhost_new_device(void);
-void vhost_destroy_device(int);
+ for (i = 0; i < dev->nr_guest_pages; i++) {
+ page = &dev->guest_pages[i];
-void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
+ if (gpa >= page->guest_phys_addr &&
+ gpa + size < page->guest_phys_addr + page->size) {
+ return gpa - page->guest_phys_addr +
+ page->host_phys_addr;
+ }
+ }
-int vhost_get_features(int, uint64_t *);
-int vhost_set_features(int, uint64_t *);
+ return 0;
+}
-int vhost_set_vring_num(int, struct vhost_vring_state *);
-int vhost_set_vring_addr(int, struct vhost_vring_addr *);
-int vhost_set_vring_base(int, struct vhost_vring_state *);
-int vhost_get_vring_base(int, uint32_t, struct vhost_vring_state *);
+struct virtio_net_device_ops const *notify_ops;
+struct virtio_net *get_device(int vid);
-int vhost_set_vring_kick(int, struct vhost_vring_file *);
-int vhost_set_vring_call(int, struct vhost_vring_file *);
+int vhost_new_device(void);
+void cleanup_device(struct virtio_net *dev, int destroy);
+void reset_device(struct virtio_net *dev);
+void vhost_destroy_device(int);
-int vhost_set_backend(int, struct vhost_vring_file *);
+int alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx);
-int vhost_set_owner(int);
-int vhost_reset_owner(int);
+void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
+void vhost_enable_dequeue_zero_copy(int vid);
/*
- * Backend-specific cleanup. Defined by vhost-cuse and vhost-user.
+ * Backend-specific cleanup.
+ *
+ * TODO: fix it; we have one backend now
*/
void vhost_backend_cleanup(struct virtio_net *dev);
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
deleted file mode 100644
index 5d150116..00000000
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ /dev/null
@@ -1,431 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <errno.h>
-#include <fuse/cuse_lowlevel.h>
-#include <linux/limits.h>
-#include <linux/vhost.h>
-#include <stdint.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <rte_ethdev.h>
-#include <rte_log.h>
-#include <rte_string_fns.h>
-#include <rte_virtio_net.h>
-
-#include "virtio-net-cdev.h"
-#include "vhost-net.h"
-#include "eventfd_copy.h"
-
-#define FUSE_OPT_DUMMY "\0\0"
-#define FUSE_OPT_FORE "-f\0\0"
-#define FUSE_OPT_NOMULTI "-s\0\0"
-
-static const uint32_t default_major = 231;
-static const uint32_t default_minor = 1;
-static const char cuse_device_name[] = "/dev/cuse";
-static const char default_cdev[] = "vhost-net";
-
-static struct fuse_session *session;
-
-/*
- * Returns vhost_cuse_device_ctx from given fuse_req_t. The
- * index is populated later when the device is added to the
- * device linked list.
- */
-static struct vhost_cuse_device_ctx
-fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
-{
- struct vhost_cuse_device_ctx ctx;
- struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
-
- ctx.pid = req_ctx->pid;
- ctx.vid = (int)fi->fh;
-
- return ctx;
-}
-
-/*
- * When the device is created in QEMU it gets initialised here and
- * added to the device linked list.
- */
-static void
-vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
-{
- int vid = 0;
-
- vid = vhost_new_device();
- if (vid == -1) {
- fuse_reply_err(req, EPERM);
- return;
- }
-
- fi->fh = vid;
-
- RTE_LOG(INFO, VHOST_CONFIG,
- "(%d) device configuration started\n", vid);
- fuse_reply_open(req, fi);
-}
-
-/*
- * When QEMU is shutdown or killed the device gets released.
- */
-static void
-vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
-{
- int err = 0;
- struct vhost_cuse_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
-
- vhost_destroy_device(ctx.vid);
- RTE_LOG(INFO, VHOST_CONFIG, "(%d) device released\n", ctx.vid);
- fuse_reply_err(req, err);
-}
-
-/*
- * Boilerplate code for CUSE IOCTL
- * Implicit arguments: vid, req, result.
- */
-#define VHOST_IOCTL(func) do { \
- result = (func)(vid); \
- fuse_reply_ioctl(req, result, NULL, 0); \
-} while (0)
-
-/*
- * Boilerplate IOCTL RETRY
- * Implicit arguments: req.
- */
-#define VHOST_IOCTL_RETRY(size_r, size_w) do { \
- struct iovec iov_r = { arg, (size_r) }; \
- struct iovec iov_w = { arg, (size_w) }; \
- fuse_reply_ioctl_retry(req, &iov_r, \
- (size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
-} while (0)
-
-/*
- * Boilerplate code for CUSE Read IOCTL
- * Implicit arguments: vid, req, result, in_bufsz, in_buf.
- */
-#define VHOST_IOCTL_R(type, var, func) do { \
- if (!in_bufsz) { \
- VHOST_IOCTL_RETRY(sizeof(type), 0);\
- } else { \
- (var) = *(const type*)in_buf; \
- result = func(vid, &(var)); \
- fuse_reply_ioctl(req, result, NULL, 0);\
- } \
-} while (0)
-
-/*
- * Boilerplate code for CUSE Write IOCTL
- * Implicit arguments: vid, req, result, out_bufsz.
- */
-#define VHOST_IOCTL_W(type, var, func) do { \
- if (!out_bufsz) { \
- VHOST_IOCTL_RETRY(0, sizeof(type));\
- } else { \
- result = (func)(vid, &(var));\
- fuse_reply_ioctl(req, result, &(var), sizeof(type));\
- } \
-} while (0)
-
-/*
- * Boilerplate code for CUSE Read/Write IOCTL
- * Implicit arguments: vid, req, result, in_bufsz, in_buf.
- */
-#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \
- if (!in_bufsz) { \
- VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
- } else { \
- (var1) = *(const type1*) (in_buf); \
- result = (func)(vid, (var1), &(var2)); \
- fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
- } \
-} while (0)
-
-/*
- * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type
- * of IOCTL a buffer is requested to read or to write. This request is handled
- * by FUSE and the buffer is then given to CUSE.
- */
-static void
-vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
- struct fuse_file_info *fi, __rte_unused unsigned flags,
- const void *in_buf, size_t in_bufsz, size_t out_bufsz)
-{
- struct vhost_cuse_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
- struct vhost_vring_file file;
- struct vhost_vring_state state;
- struct vhost_vring_addr addr;
- uint64_t features;
- uint32_t index;
- int result = 0;
- int vid = ctx.vid;
-
- switch (cmd) {
- case VHOST_NET_SET_BACKEND:
- LOG_DEBUG(VHOST_CONFIG,
- "(%d) IOCTL: VHOST_NET_SET_BACKEND\n", ctx.vid);
- if (!in_buf) {
- VHOST_IOCTL_RETRY(sizeof(file), 0);
- break;
- }
- file = *(const struct vhost_vring_file *)in_buf;
- result = cuse_set_backend(ctx, &file);
- fuse_reply_ioctl(req, result, NULL, 0);
- break;
-
- case VHOST_GET_FEATURES:
- LOG_DEBUG(VHOST_CONFIG,
- "(%d) IOCTL: VHOST_GET_FEATURES\n", vid);
- VHOST_IOCTL_W(uint64_t, features, vhost_get_features);
- break;
-
- case VHOST_SET_FEATURES:
- LOG_DEBUG(VHOST_CONFIG,
- "(%d) IOCTL: VHOST_SET_FEATURES\n", vid);
- VHOST_IOCTL_R(uint64_t, features, vhost_set_features);
- break;
-
- case VHOST_RESET_OWNER:
- LOG_DEBUG(VHOST_CONFIG,
- "(%d) IOCTL: VHOST_RESET_OWNER\n", vid);
- VHOST_IOCTL(vhost_reset_owner);
- break;
-
- case VHOST_SET_OWNER:
- LOG_DEBUG(VHOST_CONFIG,
- "(%d) IOCTL: VHOST_SET_OWNER\n", vid);
- VHOST_IOCTL(vhost_set_owner);
- break;
-
- case VHOST_SET_MEM_TABLE:
- /*TODO fix race condition.*/
- LOG_DEBUG(VHOST_CONFIG,
- "(%d) IOCTL: VHOST_SET_MEM_TABLE\n", vid);
- static struct vhost_memory mem_temp;
-
- switch (in_bufsz) {
- case 0:
- VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
- break;
-
- case sizeof(struct vhost_memory):
- mem_temp = *(const struct vhost_memory *) in_buf;
-
- if (mem_temp.nregions > 0) {
- VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
- (sizeof(struct vhost_memory_region) *
- mem_temp.nregions), 0);
- } else {
- result = -1;
- fuse_reply_ioctl(req, result, NULL, 0);
- }
- break;
-
- default:
- result = cuse_set_mem_table(ctx, in_buf,
- mem_temp.nregions);
- if (result)
- fuse_reply_err(req, EINVAL);
- else
- fuse_reply_ioctl(req, result, NULL, 0);
- }
- break;
-
- case VHOST_SET_VRING_NUM:
- LOG_DEBUG(VHOST_CONFIG,
- "(%d) IOCTL: VHOST_SET_VRING_NUM\n", vid);
- VHOST_IOCTL_R(struct vhost_vring_state, state,
- vhost_set_vring_num);
- break;
-
- case VHOST_SET_VRING_BASE:
- LOG_DEBUG(VHOST_CONFIG,
- "(%d) IOCTL: VHOST_SET_VRING_BASE\n", vid);
- VHOST_IOCTL_R(struct vhost_vring_state, state,
- vhost_set_vring_base);
- break;
-
- case VHOST_GET_VRING_BASE:
- LOG_DEBUG(VHOST_CONFIG,
- "(%d) IOCTL: VHOST_GET_VRING_BASE\n", vid);
- VHOST_IOCTL_RW(uint32_t, index,
- struct vhost_vring_state, state, vhost_get_vring_base);
- break;
-
- case VHOST_SET_VRING_ADDR:
- LOG_DEBUG(VHOST_CONFIG,
- "(%d) IOCTL: VHOST_SET_VRING_ADDR\n", vid);
- VHOST_IOCTL_R(struct vhost_vring_addr, addr,
- vhost_set_vring_addr);
- break;
-
- case VHOST_SET_VRING_KICK:
- case VHOST_SET_VRING_CALL:
- if (cmd == VHOST_SET_VRING_KICK)
- LOG_DEBUG(VHOST_CONFIG,
- "(%d) IOCTL: VHOST_SET_VRING_KICK\n", vid);
- else
- LOG_DEBUG(VHOST_CONFIG,
- "(%d) IOCTL: VHOST_SET_VRING_CALL\n", vid);
- if (!in_buf)
- VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0);
- else {
- int fd;
- file = *(const struct vhost_vring_file *)in_buf;
- LOG_DEBUG(VHOST_CONFIG,
- "idx:%d fd:%d\n", file.index, file.fd);
- fd = eventfd_copy(file.fd, ctx.pid);
- if (fd < 0) {
- fuse_reply_ioctl(req, -1, NULL, 0);
- result = -1;
- break;
- }
- file.fd = fd;
- if (cmd == VHOST_SET_VRING_KICK) {
- result = vhost_set_vring_kick(vid, &file);
- fuse_reply_ioctl(req, result, NULL, 0);
- } else {
- result = vhost_set_vring_call(vid, &file);
- fuse_reply_ioctl(req, result, NULL, 0);
- }
- }
- break;
-
- default:
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%d) IOCTL: DOESN NOT EXIST\n", vid);
- result = -1;
- fuse_reply_ioctl(req, result, NULL, 0);
- }
-
- if (result < 0)
- LOG_DEBUG(VHOST_CONFIG,
- "(%d) IOCTL: FAIL\n", vid);
- else
- LOG_DEBUG(VHOST_CONFIG,
- "(%d) IOCTL: SUCCESS\n", vid);
-}
-
-/*
- * Structure handling open, release and ioctl function pointers is populated.
- */
-static const struct cuse_lowlevel_ops vhost_net_ops = {
- .open = vhost_net_open,
- .release = vhost_net_release,
- .ioctl = vhost_net_ioctl,
-};
-
-/*
- * cuse_info is populated and used to register the cuse device.
- * vhost_net_device_ops are also passed when the device is registered in app.
- */
-int
-rte_vhost_driver_register(const char *dev_name, uint64_t flags)
-{
- struct cuse_info cuse_info;
- char device_name[PATH_MAX] = "";
- char char_device_name[PATH_MAX] = "";
- const char *device_argv[] = { device_name };
-
- char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
- char fuse_opt_fore[] = FUSE_OPT_FORE;
- char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
- char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
-
- if (flags) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "vhost-cuse does not support any flags so far\n");
- return -1;
- }
-
- if (access(cuse_device_name, R_OK | W_OK) < 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "char device %s can't be accessed, maybe not exist\n",
- cuse_device_name);
- return -1;
- }
-
- if (eventfd_init() < 0)
- return -1;
-
- /*
- * The device name is created. This is passed to QEMU so that it can
- * register the device with our application.
- */
- snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
- snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
-
- /* Check if device already exists. */
- if (access(char_device_name, F_OK) != -1) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "char device %s already exists\n", char_device_name);
- return -1;
- }
-
- memset(&cuse_info, 0, sizeof(cuse_info));
- cuse_info.dev_major = default_major;
- cuse_info.dev_minor = default_minor;
- cuse_info.dev_info_argc = 1;
- cuse_info.dev_info_argv = device_argv;
- cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
-
- session = cuse_lowlevel_setup(3, fuse_argv,
- &cuse_info, &vhost_net_ops, 0, NULL);
- if (session == NULL)
- return -1;
-
- return 0;
-}
-
-/**
- * An empty function for unregister
- */
-int
-rte_vhost_driver_unregister(const char *dev_name __rte_unused)
-{
- return 0;
-}
-
-/**
- * The CUSE session is launched allowing the application to receive open,
- * release and ioctl calls.
- */
-int
-rte_vhost_driver_session_start(void)
-{
- fuse_session_loop(session);
-
- return 0;
-}
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
deleted file mode 100644
index 552be7d4..00000000
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
+++ /dev/null
@@ -1,433 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdint.h>
-#include <dirent.h>
-#include <linux/vhost.h>
-#include <linux/virtio_net.h>
-#include <fuse/cuse_lowlevel.h>
-#include <stddef.h>
-#include <string.h>
-#include <stdlib.h>
-#include <sys/eventfd.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <linux/if_tun.h>
-#include <linux/if.h>
-#include <errno.h>
-
-#include <rte_log.h>
-
-#include "rte_virtio_net.h"
-#include "vhost-net.h"
-#include "virtio-net-cdev.h"
-#include "eventfd_copy.h"
-
-/* Line size for reading maps file. */
-static const uint32_t BUFSIZE = PATH_MAX;
-
-/* Size of prot char array in procmap. */
-#define PROT_SZ 5
-
-/* Number of elements in procmap struct. */
-#define PROCMAP_SZ 8
-
-/* Structure containing information gathered from maps file. */
-struct procmap {
- uint64_t va_start; /* Start virtual address in file. */
- uint64_t len; /* Size of file. */
- uint64_t pgoff; /* Not used. */
- uint32_t maj; /* Not used. */
- uint32_t min; /* Not used. */
- uint32_t ino; /* Not used. */
- char prot[PROT_SZ]; /* Not used. */
- char fname[PATH_MAX]; /* File name. */
-};
-
-/*
- * Locate the file containing QEMU's memory space and
- * map it to our address space.
- */
-static int
-host_memory_map(pid_t pid, uint64_t addr,
- uint64_t *mapped_address, uint64_t *mapped_size)
-{
- struct dirent *dptr = NULL;
- struct procmap procmap;
- DIR *dp = NULL;
- int fd;
- int i;
- char memfile[PATH_MAX];
- char mapfile[PATH_MAX];
- char procdir[PATH_MAX];
- char resolved_path[PATH_MAX];
- char *path = NULL;
- FILE *fmap;
- void *map;
- uint8_t found = 0;
- char line[BUFSIZE];
- char dlm[] = "- : ";
- char *str, *sp, *in[PROCMAP_SZ];
- char *end = NULL;
-
- /* Path where mem files are located. */
- snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
- /* Maps file used to locate mem file. */
- snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
-
- fmap = fopen(mapfile, "r");
- if (fmap == NULL) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "Failed to open maps file for pid %d\n",
- pid);
- return -1;
- }
-
- /* Read through maps file until we find out base_address. */
- while (fgets(line, BUFSIZE, fmap) != 0) {
- str = line;
- errno = 0;
- /* Split line into fields. */
- for (i = 0; i < PROCMAP_SZ; i++) {
- in[i] = strtok_r(str, &dlm[i], &sp);
- if ((in[i] == NULL) || (errno != 0)) {
- fclose(fmap);
- return -1;
- }
- str = NULL;
- }
-
- /* Convert/Copy each field as needed. */
- procmap.va_start = strtoull(in[0], &end, 16);
- if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- procmap.len = strtoull(in[1], &end, 16);
- if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- procmap.pgoff = strtoull(in[3], &end, 16);
- if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- procmap.maj = strtoul(in[4], &end, 16);
- if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- procmap.min = strtoul(in[5], &end, 16);
- if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- procmap.ino = strtoul(in[6], &end, 16);
- if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- memcpy(&procmap.prot, in[2], PROT_SZ);
- memcpy(&procmap.fname, in[7], PATH_MAX);
-
- if (procmap.va_start == addr) {
- procmap.len = procmap.len - procmap.va_start;
- found = 1;
- break;
- }
- }
- fclose(fmap);
-
- if (!found) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "Failed to find memory file in pid %d maps file\n",
- pid);
- return -1;
- }
-
- /* Find the guest memory file among the process fds. */
- dp = opendir(procdir);
- if (dp == NULL) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "Cannot open pid %d process directory\n",
- pid);
- return -1;
- }
-
- found = 0;
-
- /* Read the fd directory contents. */
- while (NULL != (dptr = readdir(dp))) {
- snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
- pid, dptr->d_name);
- path = realpath(memfile, resolved_path);
- if ((path == NULL) && (strlen(resolved_path) == 0)) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "Failed to resolve fd directory\n");
- closedir(dp);
- return -1;
- }
- if (strncmp(resolved_path, procmap.fname,
- strnlen(procmap.fname, PATH_MAX)) == 0) {
- found = 1;
- break;
- }
- }
-
- closedir(dp);
-
- if (found == 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "Failed to find memory file for pid %d\n",
- pid);
- return -1;
- }
- /* Open the shared memory file and map the memory into this process. */
- fd = open(memfile, O_RDWR);
-
- if (fd == -1) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "Failed to open %s for pid %d\n",
- memfile, pid);
- return -1;
- }
-
- map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,
- MAP_POPULATE|MAP_SHARED, fd, 0);
- close(fd);
-
- if (map == MAP_FAILED) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "Error mapping the file %s for pid %d\n",
- memfile, pid);
- return -1;
- }
-
- /* Store the memory address and size in the device data structure */
- *mapped_address = (uint64_t)(uintptr_t)map;
- *mapped_size = procmap.len;
-
- LOG_DEBUG(VHOST_CONFIG,
- "Mem File: %s->%s - Size: %llu - VA: %p\n",
- memfile, resolved_path,
- (unsigned long long)*mapped_size, map);
-
- return 0;
-}
-
-int
-cuse_set_mem_table(struct vhost_cuse_device_ctx ctx,
- const struct vhost_memory *mem_regions_addr, uint32_t nregions)
-{
- uint64_t size = offsetof(struct vhost_memory, regions);
- uint32_t idx, valid_regions;
- struct virtio_memory_regions *pregion;
- struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
- ((uint64_t)(uintptr_t)mem_regions_addr + size);
- uint64_t base_address = 0, mapped_address, mapped_size;
- struct virtio_net *dev;
-
- dev = get_device(ctx.vid);
- if (dev == NULL)
- return -1;
-
- if (dev->mem && dev->mem->mapped_address) {
- munmap((void *)(uintptr_t)dev->mem->mapped_address,
- (size_t)dev->mem->mapped_size);
- free(dev->mem);
- dev->mem = NULL;
- }
-
- dev->mem = calloc(1, sizeof(struct virtio_memory) +
- sizeof(struct virtio_memory_regions) * nregions);
- if (dev->mem == NULL) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%d) failed to allocate memory for dev->mem\n",
- dev->vid);
- return -1;
- }
-
- pregion = &dev->mem->regions[0];
-
- for (idx = 0; idx < nregions; idx++) {
- pregion[idx].guest_phys_address =
- mem_regions[idx].guest_phys_addr;
- pregion[idx].guest_phys_address_end =
- pregion[idx].guest_phys_address +
- mem_regions[idx].memory_size;
- pregion[idx].memory_size =
- mem_regions[idx].memory_size;
- pregion[idx].userspace_address =
- mem_regions[idx].userspace_addr;
-
- LOG_DEBUG(VHOST_CONFIG,
- "REGION: %u - GPA: %p - QVA: %p - SIZE (%"PRIu64")\n",
- idx,
- (void *)(uintptr_t)pregion[idx].guest_phys_address,
- (void *)(uintptr_t)pregion[idx].userspace_address,
- pregion[idx].memory_size);
-
- /*set the base address mapping*/
- if (pregion[idx].guest_phys_address == 0x0) {
- base_address =
- pregion[idx].userspace_address;
- /* Map VM memory file */
- if (host_memory_map(ctx.pid, base_address,
- &mapped_address, &mapped_size) != 0) {
- free(dev->mem);
- dev->mem = NULL;
- return -1;
- }
- dev->mem->mapped_address = mapped_address;
- dev->mem->base_address = base_address;
- dev->mem->mapped_size = mapped_size;
- }
- }
-
- /* Check that we have a valid base address. */
- if (base_address == 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "Failed to find base address of qemu memory file.\n");
- free(dev->mem);
- dev->mem = NULL;
- return -1;
- }
-
- valid_regions = nregions;
- for (idx = 0; idx < nregions; idx++) {
- if ((pregion[idx].userspace_address < base_address) ||
- (pregion[idx].userspace_address >
- (base_address + mapped_size)))
- valid_regions--;
- }
-
-
- if (valid_regions != nregions) {
- valid_regions = 0;
- for (idx = nregions; 0 != idx--; ) {
- if ((pregion[idx].userspace_address < base_address) ||
- (pregion[idx].userspace_address >
- (base_address + mapped_size))) {
- memmove(&pregion[idx], &pregion[idx + 1],
- sizeof(struct virtio_memory_regions) *
- valid_regions);
- } else
- valid_regions++;
- }
- }
-
- for (idx = 0; idx < valid_regions; idx++) {
- pregion[idx].address_offset =
- mapped_address - base_address +
- pregion[idx].userspace_address -
- pregion[idx].guest_phys_address;
- }
- dev->mem->nregions = valid_regions;
-
- return 0;
-}
-
-/*
- * Function to get the tap device name from the provided file descriptor and
- * save it in the device structure.
- */
-static int
-get_ifname(int vid, int tap_fd, int pid)
-{
- int fd_tap;
- struct ifreq ifr;
- uint32_t ifr_size;
- int ret;
-
- fd_tap = eventfd_copy(tap_fd, pid);
- if (fd_tap < 0)
- return -1;
-
- ret = ioctl(fd_tap, TUNGETIFF, &ifr);
-
- if (close(fd_tap) < 0)
- RTE_LOG(ERR, VHOST_CONFIG, "(%d) fd close failed\n", vid);
-
- if (ret >= 0) {
- ifr_size = strnlen(ifr.ifr_name, sizeof(ifr.ifr_name));
- vhost_set_ifname(vid, ifr.ifr_name, ifr_size);
- } else
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%d) TUNGETIFF ioctl failed\n", vid);
-
- return 0;
-}
-
-int
-cuse_set_backend(struct vhost_cuse_device_ctx ctx,
- struct vhost_vring_file *file)
-{
- struct virtio_net *dev;
-
- dev = get_device(ctx.vid);
- if (dev == NULL)
- return -1;
-
- if (!(dev->flags & VIRTIO_DEV_RUNNING) && file->fd != VIRTIO_DEV_STOPPED)
- get_ifname(ctx.vid, file->fd, ctx.pid);
-
- return vhost_set_backend(ctx.vid, file);
-}
-
-void
-vhost_backend_cleanup(struct virtio_net *dev)
-{
- /* Unmap QEMU memory file if mapped. */
- if (dev->mem) {
- munmap((void *)(uintptr_t)dev->mem->mapped_address,
- (size_t)dev->mem->mapped_size);
- free(dev->mem);
- dev->mem = NULL;
- }
-}
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
new file mode 100644
index 00000000..6b83c15f
--- /dev/null
+++ b/lib/librte_vhost/vhost_user.c
@@ -0,0 +1,1033 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <assert.h>
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numaif.h>
+#endif
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include "vhost.h"
+#include "vhost_user.h"
+
+static const char *vhost_message_str[VHOST_USER_MAX] = {
+ [VHOST_USER_NONE] = "VHOST_USER_NONE",
+ [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
+ [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
+ [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
+ [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
+ [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
+ [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
+ [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
+ [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
+ [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
+ [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
+ [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
+ [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
+ [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
+ [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
+ [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
+ [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
+ [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
+ [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
+ [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
+};
+
+static uint64_t
+get_blk_size(int fd)
+{
+ struct stat stat;
+ int ret;
+
+ ret = fstat(fd, &stat);
+ return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
+}
+
+static void
+free_mem_region(struct virtio_net *dev)
+{
+ uint32_t i;
+ struct virtio_memory_region *reg;
+
+ if (!dev || !dev->mem)
+ return;
+
+ for (i = 0; i < dev->mem->nregions; i++) {
+ reg = &dev->mem->regions[i];
+ if (reg->host_user_addr) {
+ munmap(reg->mmap_addr, reg->mmap_size);
+ close(reg->fd);
+ }
+ }
+}
+
+void
+vhost_backend_cleanup(struct virtio_net *dev)
+{
+ if (dev->mem) {
+ free_mem_region(dev);
+ rte_free(dev->mem);
+ dev->mem = NULL;
+ }
+ if (dev->log_addr) {
+ munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
+ dev->log_addr = 0;
+ }
+}
+
+/*
+ * This function just returns success at the moment unless
+ * the device hasn't been initialised.
+ */
+static int
+vhost_user_set_owner(void)
+{
+ return 0;
+}
+
+static int
+vhost_user_reset_owner(struct virtio_net *dev)
+{
+ if (dev->flags & VIRTIO_DEV_RUNNING) {
+ dev->flags &= ~VIRTIO_DEV_RUNNING;
+ notify_ops->destroy_device(dev->vid);
+ }
+
+ cleanup_device(dev, 0);
+ reset_device(dev);
+ return 0;
+}
+
+/*
+ * The features that we support are requested.
+ */
+static uint64_t
+vhost_user_get_features(void)
+{
+ return VHOST_FEATURES;
+}
+
+/*
+ * We receive the negotiated features supported by us and the virtio device.
+ */
+static int
+vhost_user_set_features(struct virtio_net *dev, uint64_t features)
+{
+ if (features & ~VHOST_FEATURES)
+ return -1;
+
+ dev->features = features;
+ if (dev->features &
+ ((1 << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) {
+ dev->vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+ } else {
+ dev->vhost_hlen = sizeof(struct virtio_net_hdr);
+ }
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%d) mergeable RX buffers %s, virtio 1 %s\n",
+ dev->vid,
+ (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off",
+ (dev->features & (1ULL << VIRTIO_F_VERSION_1)) ? "on" : "off");
+
+ return 0;
+}
+
+/*
+ * The virtio device sends us the size of the descriptor ring.
+ */
+static int
+vhost_user_set_vring_num(struct virtio_net *dev,
+ struct vhost_vring_state *state)
+{
+ struct vhost_virtqueue *vq = dev->virtqueue[state->index];
+
+ vq->size = state->num;
+
+ if (dev->dequeue_zero_copy) {
+ vq->nr_zmbuf = 0;
+ vq->last_zmbuf_idx = 0;
+ vq->zmbuf_size = vq->size;
+ vq->zmbufs = rte_zmalloc(NULL, vq->zmbuf_size *
+ sizeof(struct zcopy_mbuf), 0);
+ if (vq->zmbufs == NULL) {
+ RTE_LOG(WARNING, VHOST_CONFIG,
+ "failed to allocate mem for zero copy; "
+ "zero copy is force disabled\n");
+ dev->dequeue_zero_copy = 0;
+ }
+ }
+
+ vq->shadow_used_ring = rte_malloc(NULL,
+ vq->size * sizeof(struct vring_used_elem),
+ RTE_CACHE_LINE_SIZE);
+ if (!vq->shadow_used_ring) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to allocate memory for shadow used ring.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Reallocate virtio_dev and vhost_virtqueue data structure to make them on the
+ * same numa node as the memory of vring descriptor.
+ */
+#ifdef RTE_LIBRTE_VHOST_NUMA
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index)
+{
+ int oldnode, newnode;
+ struct virtio_net *old_dev;
+ struct vhost_virtqueue *old_vq, *vq;
+ int ret;
+
+ /*
+ * vq is allocated on pairs, we should try to do realloc
+ * on first queue of one queue pair only.
+ */
+ if (index % VIRTIO_QNUM != 0)
+ return dev;
+
+ old_dev = dev;
+ vq = old_vq = dev->virtqueue[index];
+
+ ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc,
+ MPOL_F_NODE | MPOL_F_ADDR);
+
+ /* check if we need to reallocate vq */
+ ret |= get_mempolicy(&oldnode, NULL, 0, old_vq,
+ MPOL_F_NODE | MPOL_F_ADDR);
+ if (ret) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Unable to get vq numa information.\n");
+ return dev;
+ }
+ if (oldnode != newnode) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "reallocate vq from %d to %d node\n", oldnode, newnode);
+ vq = rte_malloc_socket(NULL, sizeof(*vq) * VIRTIO_QNUM, 0,
+ newnode);
+ if (!vq)
+ return dev;
+
+ memcpy(vq, old_vq, sizeof(*vq) * VIRTIO_QNUM);
+ rte_free(old_vq);
+ }
+
+ /* check if we need to reallocate dev */
+ ret = get_mempolicy(&oldnode, NULL, 0, old_dev,
+ MPOL_F_NODE | MPOL_F_ADDR);
+ if (ret) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Unable to get dev numa information.\n");
+ goto out;
+ }
+ if (oldnode != newnode) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "reallocate dev from %d to %d node\n",
+ oldnode, newnode);
+ dev = rte_malloc_socket(NULL, sizeof(*dev), 0, newnode);
+ if (!dev) {
+ dev = old_dev;
+ goto out;
+ }
+
+ memcpy(dev, old_dev, sizeof(*dev));
+ rte_free(old_dev);
+ }
+
+out:
+ dev->virtqueue[index] = vq;
+ dev->virtqueue[index + 1] = vq + 1;
+ vhost_devices[dev->vid] = dev;
+
+ return dev;
+}
+#else
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index __rte_unused)
+{
+ return dev;
+}
+#endif
+
+/*
+ * Converts QEMU virtual address to Vhost virtual address. This function is
+ * used to convert the ring addresses to our address space.
+ */
+static uint64_t
+qva_to_vva(struct virtio_net *dev, uint64_t qva)
+{
+ struct virtio_memory_region *reg;
+ uint32_t i;
+
+ /* Find the region where the address lives. */
+ for (i = 0; i < dev->mem->nregions; i++) {
+ reg = &dev->mem->regions[i];
+
+ if (qva >= reg->guest_user_addr &&
+ qva < reg->guest_user_addr + reg->size) {
+ return qva - reg->guest_user_addr +
+ reg->host_user_addr;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * The virtio device sends us the desc, used and avail ring addresses.
+ * This function then converts these to our address space.
+ */
+static int
+vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr)
+{
+ struct vhost_virtqueue *vq;
+
+ if (dev->mem == NULL)
+ return -1;
+
+ /* addr->index refers to the queue index. The txq 1, rxq is 0. */
+ vq = dev->virtqueue[addr->index];
+
+ /* The addresses are converted from QEMU virtual to Vhost virtual. */
+ vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev,
+ addr->desc_user_addr);
+ if (vq->desc == 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%d) failed to find desc ring address.\n",
+ dev->vid);
+ return -1;
+ }
+
+ dev = numa_realloc(dev, addr->index);
+ vq = dev->virtqueue[addr->index];
+
+ vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev,
+ addr->avail_user_addr);
+ if (vq->avail == 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%d) failed to find avail ring address.\n",
+ dev->vid);
+ return -1;
+ }
+
+ vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev,
+ addr->used_user_addr);
+ if (vq->used == 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%d) failed to find used ring address.\n",
+ dev->vid);
+ return -1;
+ }
+
+ if (vq->last_used_idx != vq->used->idx) {
+ RTE_LOG(WARNING, VHOST_CONFIG,
+ "last_used_idx (%u) and vq->used->idx (%u) mismatches; "
+ "some packets maybe resent for Tx and dropped for Rx\n",
+ vq->last_used_idx, vq->used->idx);
+ vq->last_used_idx = vq->used->idx;
+ vq->last_avail_idx = vq->used->idx;
+ }
+
+ vq->log_guest_addr = addr->log_guest_addr;
+
+ LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
+ dev->vid, vq->desc);
+ LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n",
+ dev->vid, vq->avail);
+ LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n",
+ dev->vid, vq->used);
+ LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n",
+ dev->vid, vq->log_guest_addr);
+
+ return 0;
+}
+
+/*
+ * The virtio device sends us the available ring last used index.
+ */
+static int
+vhost_user_set_vring_base(struct virtio_net *dev,
+ struct vhost_vring_state *state)
+{
+ dev->virtqueue[state->index]->last_used_idx = state->num;
+ dev->virtqueue[state->index]->last_avail_idx = state->num;
+
+ return 0;
+}
+
+static void
+add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
+ uint64_t host_phys_addr, uint64_t size)
+{
+ struct guest_page *page, *last_page;
+
+ if (dev->nr_guest_pages == dev->max_guest_pages) {
+ dev->max_guest_pages *= 2;
+ dev->guest_pages = realloc(dev->guest_pages,
+ dev->max_guest_pages * sizeof(*page));
+ }
+
+ if (dev->nr_guest_pages > 0) {
+ last_page = &dev->guest_pages[dev->nr_guest_pages - 1];
+ /* merge if the two pages are continuous */
+ if (host_phys_addr == last_page->host_phys_addr +
+ last_page->size) {
+ last_page->size += size;
+ return;
+ }
+ }
+
+ page = &dev->guest_pages[dev->nr_guest_pages++];
+ page->guest_phys_addr = guest_phys_addr;
+ page->host_phys_addr = host_phys_addr;
+ page->size = size;
+}
+
+static void
+add_guest_pages(struct virtio_net *dev, struct virtio_memory_region *reg,
+ uint64_t page_size)
+{
+ uint64_t reg_size = reg->size;
+ uint64_t host_user_addr = reg->host_user_addr;
+ uint64_t guest_phys_addr = reg->guest_phys_addr;
+ uint64_t host_phys_addr;
+ uint64_t size;
+
+ host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t)host_user_addr);
+ size = page_size - (guest_phys_addr & (page_size - 1));
+ size = RTE_MIN(size, reg_size);
+
+ add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size);
+ host_user_addr += size;
+ guest_phys_addr += size;
+ reg_size -= size;
+
+ while (reg_size > 0) {
+ host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t)
+ host_user_addr);
+ add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
+ page_size);
+
+ host_user_addr += page_size;
+ guest_phys_addr += page_size;
+ reg_size -= page_size;
+ }
+}
+
+#ifdef RTE_LIBRTE_VHOST_DEBUG
+/* TODO: enable it only in debug mode? */
+static void
+dump_guest_pages(struct virtio_net *dev)
+{
+ uint32_t i;
+ struct guest_page *page;
+
+ for (i = 0; i < dev->nr_guest_pages; i++) {
+ page = &dev->guest_pages[i];
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "guest physical page region %u\n"
+ "\t guest_phys_addr: %" PRIx64 "\n"
+ "\t host_phys_addr : %" PRIx64 "\n"
+ "\t size : %" PRIx64 "\n",
+ i,
+ page->guest_phys_addr,
+ page->host_phys_addr,
+ page->size);
+ }
+}
+#else
+#define dump_guest_pages(dev)
+#endif
+
+static int
+vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+{
+ struct VhostUserMemory memory = pmsg->payload.memory;
+ struct virtio_memory_region *reg;
+ void *mmap_addr;
+ uint64_t mmap_size;
+ uint64_t mmap_offset;
+ uint64_t alignment;
+ uint32_t i;
+ int fd;
+
+ /* Remove from the data plane. */
+ if (dev->flags & VIRTIO_DEV_RUNNING) {
+ dev->flags &= ~VIRTIO_DEV_RUNNING;
+ notify_ops->destroy_device(dev->vid);
+ }
+
+ if (dev->mem) {
+ free_mem_region(dev);
+ rte_free(dev->mem);
+ dev->mem = NULL;
+ }
+
+ dev->nr_guest_pages = 0;
+ if (!dev->guest_pages) {
+ dev->max_guest_pages = 8;
+ dev->guest_pages = malloc(dev->max_guest_pages *
+ sizeof(struct guest_page));
+ }
+
+ dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct virtio_memory) +
+ sizeof(struct virtio_memory_region) * memory.nregions, 0);
+ if (dev->mem == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%d) failed to allocate memory for dev->mem\n",
+ dev->vid);
+ return -1;
+ }
+ dev->mem->nregions = memory.nregions;
+
+ for (i = 0; i < memory.nregions; i++) {
+ fd = pmsg->fds[i];
+ reg = &dev->mem->regions[i];
+
+ reg->guest_phys_addr = memory.regions[i].guest_phys_addr;
+ reg->guest_user_addr = memory.regions[i].userspace_addr;
+ reg->size = memory.regions[i].memory_size;
+ reg->fd = fd;
+
+ mmap_offset = memory.regions[i].mmap_offset;
+ mmap_size = reg->size + mmap_offset;
+
+ /* mmap() without flag of MAP_ANONYMOUS, should be called
+ * with length argument aligned with hugepagesz at older
+ * longterm version Linux, like 2.6.32 and 3.2.72, or
+ * mmap() will fail with EINVAL.
+ *
+ * to avoid failure, make sure in caller to keep length
+ * aligned.
+ */
+ alignment = get_blk_size(fd);
+ if (alignment == (uint64_t)-1) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "couldn't get hugepage size through fstat\n");
+ goto err_mmap;
+ }
+ mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment);
+
+ mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, 0);
+
+ if (mmap_addr == MAP_FAILED) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "mmap region %u failed.\n", i);
+ goto err_mmap;
+ }
+
+ reg->mmap_addr = mmap_addr;
+ reg->mmap_size = mmap_size;
+ reg->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
+ mmap_offset;
+
+ add_guest_pages(dev, reg, alignment);
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "guest memory region %u, size: 0x%" PRIx64 "\n"
+ "\t guest physical addr: 0x%" PRIx64 "\n"
+ "\t guest virtual addr: 0x%" PRIx64 "\n"
+ "\t host virtual addr: 0x%" PRIx64 "\n"
+ "\t mmap addr : 0x%" PRIx64 "\n"
+ "\t mmap size : 0x%" PRIx64 "\n"
+ "\t mmap align: 0x%" PRIx64 "\n"
+ "\t mmap off : 0x%" PRIx64 "\n",
+ i, reg->size,
+ reg->guest_phys_addr,
+ reg->guest_user_addr,
+ reg->host_user_addr,
+ (uint64_t)(uintptr_t)mmap_addr,
+ mmap_size,
+ alignment,
+ mmap_offset);
+ }
+
+ dump_guest_pages(dev);
+
+ return 0;
+
+err_mmap:
+ free_mem_region(dev);
+ rte_free(dev->mem);
+ dev->mem = NULL;
+ return -1;
+}
+
+static int
+vq_is_ready(struct vhost_virtqueue *vq)
+{
+ return vq && vq->desc &&
+ vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
+ vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
+}
+
+static int
+virtio_is_ready(struct virtio_net *dev)
+{
+ struct vhost_virtqueue *rvq, *tvq;
+ uint32_t i;
+
+ for (i = 0; i < dev->virt_qp_nb; i++) {
+ rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
+ tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
+
+ if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "virtio is not ready for processing.\n");
+ return 0;
+ }
+ }
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "virtio is now ready for processing.\n");
+ return 1;
+}
+
+static void
+vhost_user_set_vring_call(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+{
+ struct vhost_vring_file file;
+ struct vhost_virtqueue *vq;
+ uint32_t cur_qp_idx;
+
+ file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+ if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+ file.fd = VIRTIO_INVALID_EVENTFD;
+ else
+ file.fd = pmsg->fds[0];
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vring call idx:%d file:%d\n", file.index, file.fd);
+
+ /*
+ * FIXME: VHOST_SET_VRING_CALL is the first per-vring message
+ * we get, so we do vring queue pair allocation here.
+ */
+ cur_qp_idx = file.index / VIRTIO_QNUM;
+ if (cur_qp_idx + 1 > dev->virt_qp_nb) {
+ if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0)
+ return;
+ }
+
+ vq = dev->virtqueue[file.index];
+ assert(vq != NULL);
+
+ if (vq->callfd >= 0)
+ close(vq->callfd);
+
+ vq->callfd = file.fd;
+}
+
+/*
+ * In vhost-user, when we receive kick message, will test whether virtio
+ * device is ready for packet processing.
+ */
+static void
+vhost_user_set_vring_kick(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+{
+ struct vhost_vring_file file;
+ struct vhost_virtqueue *vq;
+
+ file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+ if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+ file.fd = VIRTIO_INVALID_EVENTFD;
+ else
+ file.fd = pmsg->fds[0];
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vring kick idx:%d file:%d\n", file.index, file.fd);
+
+ vq = dev->virtqueue[file.index];
+ if (vq->kickfd >= 0)
+ close(vq->kickfd);
+ vq->kickfd = file.fd;
+
+ if (virtio_is_ready(dev) && !(dev->flags & VIRTIO_DEV_RUNNING)) {
+ if (dev->dequeue_zero_copy) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "dequeue zero copy is enabled\n");
+ }
+
+ if (notify_ops->new_device(dev->vid) == 0)
+ dev->flags |= VIRTIO_DEV_RUNNING;
+ }
+}
+
+static void
+free_zmbufs(struct vhost_virtqueue *vq)
+{
+ struct zcopy_mbuf *zmbuf, *next;
+
+ for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
+ zmbuf != NULL; zmbuf = next) {
+ next = TAILQ_NEXT(zmbuf, next);
+
+ rte_pktmbuf_free(zmbuf->mbuf);
+ TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
+ }
+
+ rte_free(vq->zmbufs);
+}
+
+/*
+ * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
+ */
+static int
+vhost_user_get_vring_base(struct virtio_net *dev,
+ struct vhost_vring_state *state)
+{
+ struct vhost_virtqueue *vq = dev->virtqueue[state->index];
+
+ /* We have to stop the queue (virtio) if it is running. */
+ if (dev->flags & VIRTIO_DEV_RUNNING) {
+ dev->flags &= ~VIRTIO_DEV_RUNNING;
+ notify_ops->destroy_device(dev->vid);
+ }
+
+ /* Here we are safe to get the last used index */
+ state->num = vq->last_used_idx;
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vring base idx:%d file:%d\n", state->index, state->num);
+ /*
+ * Based on current qemu vhost-user implementation, this message is
+ * sent and only sent in vhost_vring_stop.
+ * TODO: cleanup the vring, it isn't usable since here.
+ */
+ if (vq->kickfd >= 0)
+ close(vq->kickfd);
+
+ vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
+ if (dev->dequeue_zero_copy)
+ free_zmbufs(vq);
+ rte_free(vq->shadow_used_ring);
+ vq->shadow_used_ring = NULL;
+
+ return 0;
+}
+
+/*
+ * when virtio queues are ready to work, qemu will send us to
+ * enable the virtio queue pair.
+ */
+static int
+vhost_user_set_vring_enable(struct virtio_net *dev,
+ struct vhost_vring_state *state)
+{
+ int enable = (int)state->num;
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "set queue enable: %d to qp idx: %d\n",
+ enable, state->index);
+
+ if (notify_ops->vring_state_changed)
+ notify_ops->vring_state_changed(dev->vid, state->index, enable);
+
+ dev->virtqueue[state->index]->enabled = enable;
+
+ return 0;
+}
+
+static void
+vhost_user_set_protocol_features(struct virtio_net *dev,
+ uint64_t protocol_features)
+{
+ if (protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
+ return;
+
+ dev->protocol_features = protocol_features;
+}
+
+static int
+vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+ int fd = msg->fds[0];
+ uint64_t size, off;
+ void *addr;
+
+ if (fd < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd);
+ return -1;
+ }
+
+ if (msg->size != sizeof(VhostUserLog)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "invalid log base msg size: %"PRId32" != %d\n",
+ msg->size, (int)sizeof(VhostUserLog));
+ return -1;
+ }
+
+ size = msg->payload.log.mmap_size;
+ off = msg->payload.log.mmap_offset;
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "log mmap size: %"PRId64", offset: %"PRId64"\n",
+ size, off);
+
+ /*
+ * mmap from 0 to workaround a hugepage mmap bug: mmap will
+ * fail when offset is not page size aligned.
+ */
+ addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ close(fd);
+ if (addr == MAP_FAILED) {
+ RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
+ return -1;
+ }
+
+ /*
+ * Free previously mapped log memory on occasionally
+ * multiple VHOST_USER_SET_LOG_BASE.
+ */
+ if (dev->log_addr) {
+ munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
+ }
+ dev->log_addr = (uint64_t)(uintptr_t)addr;
+ dev->log_base = dev->log_addr + off;
+ dev->log_size = size;
+
+ return 0;
+}
+
+/*
+ * An rarp packet is constructed and broadcasted to notify switches about
+ * the new location of the migrated VM, so that packets from outside will
+ * not be lost after migration.
+ *
+ * However, we don't actually "send" a rarp packet here, instead, we set
+ * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it.
+ */
+static int
+vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+ uint8_t *mac = (uint8_t *)&msg->payload.u64;
+
+ RTE_LOG(DEBUG, VHOST_CONFIG,
+ ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
+ mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+ memcpy(dev->mac.addr_bytes, mac, 6);
+
+ /*
+ * Set the flag to inject a RARP broadcast packet at
+ * rte_vhost_dequeue_burst().
+ *
+ * rte_smp_wmb() is for making sure the mac is copied
+ * before the flag is set.
+ */
+ rte_smp_wmb();
+ rte_atomic16_set(&dev->broadcast_rarp, 1);
+
+ return 0;
+}
+
+/* return bytes# of read on success or negative val on failure. */
+static int
+read_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+ int ret;
+
+ ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
+ msg->fds, VHOST_MEMORY_MAX_NREGIONS);
+ if (ret <= 0)
+ return ret;
+
+ if (msg && msg->size) {
+ if (msg->size > sizeof(msg->payload)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "invalid msg size: %d\n", msg->size);
+ return -1;
+ }
+ ret = read(sockfd, &msg->payload, msg->size);
+ if (ret <= 0)
+ return ret;
+ if (ret != (int)msg->size) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "read control message failed\n");
+ return -1;
+ }
+ }
+
+ return ret;
+}
+
+static int
+send_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+ int ret;
+
+ if (!msg)
+ return 0;
+
+ msg->flags &= ~VHOST_USER_VERSION_MASK;
+ msg->flags |= VHOST_USER_VERSION;
+ msg->flags |= VHOST_USER_REPLY_MASK;
+
+ ret = send_fd_message(sockfd, (char *)msg,
+ VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
+
+ return ret;
+}
+
+int
+vhost_user_msg_handler(int vid, int fd)
+{
+ struct virtio_net *dev;
+ struct VhostUserMsg msg;
+ int ret;
+
+ dev = get_device(vid);
+ if (dev == NULL)
+ return -1;
+
+ ret = read_vhost_message(fd, &msg);
+ if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
+ if (ret < 0)
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "vhost read message failed\n");
+ else if (ret == 0)
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vhost peer closed\n");
+ else
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "vhost read incorrect message\n");
+
+ return -1;
+ }
+
+ RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
+ vhost_message_str[msg.request]);
+ switch (msg.request) {
+ case VHOST_USER_GET_FEATURES:
+ msg.payload.u64 = vhost_user_get_features();
+ msg.size = sizeof(msg.payload.u64);
+ send_vhost_message(fd, &msg);
+ break;
+ case VHOST_USER_SET_FEATURES:
+ vhost_user_set_features(dev, msg.payload.u64);
+ break;
+
+ case VHOST_USER_GET_PROTOCOL_FEATURES:
+ msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
+ msg.size = sizeof(msg.payload.u64);
+ send_vhost_message(fd, &msg);
+ break;
+ case VHOST_USER_SET_PROTOCOL_FEATURES:
+ vhost_user_set_protocol_features(dev, msg.payload.u64);
+ break;
+
+ case VHOST_USER_SET_OWNER:
+ vhost_user_set_owner();
+ break;
+ case VHOST_USER_RESET_OWNER:
+ vhost_user_reset_owner(dev);
+ break;
+
+ case VHOST_USER_SET_MEM_TABLE:
+ vhost_user_set_mem_table(dev, &msg);
+ break;
+
+ case VHOST_USER_SET_LOG_BASE:
+ vhost_user_set_log_base(dev, &msg);
+
+ /* it needs a reply */
+ msg.size = sizeof(msg.payload.u64);
+ send_vhost_message(fd, &msg);
+ break;
+ case VHOST_USER_SET_LOG_FD:
+ close(msg.fds[0]);
+ RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
+ break;
+
+ case VHOST_USER_SET_VRING_NUM:
+ vhost_user_set_vring_num(dev, &msg.payload.state);
+ break;
+ case VHOST_USER_SET_VRING_ADDR:
+ vhost_user_set_vring_addr(dev, &msg.payload.addr);
+ break;
+ case VHOST_USER_SET_VRING_BASE:
+ vhost_user_set_vring_base(dev, &msg.payload.state);
+ break;
+
+ case VHOST_USER_GET_VRING_BASE:
+ ret = vhost_user_get_vring_base(dev, &msg.payload.state);
+ msg.size = sizeof(msg.payload.state);
+ send_vhost_message(fd, &msg);
+ break;
+
+ case VHOST_USER_SET_VRING_KICK:
+ vhost_user_set_vring_kick(dev, &msg);
+ break;
+ case VHOST_USER_SET_VRING_CALL:
+ vhost_user_set_vring_call(dev, &msg);
+ break;
+
+ case VHOST_USER_SET_VRING_ERR:
+ if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
+ close(msg.fds[0]);
+ RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
+ break;
+
+ case VHOST_USER_GET_QUEUE_NUM:
+ msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS;
+ msg.size = sizeof(msg.payload.u64);
+ send_vhost_message(fd, &msg);
+ break;
+
+ case VHOST_USER_SET_VRING_ENABLE:
+ vhost_user_set_vring_enable(dev, &msg.payload.state);
+ break;
+ case VHOST_USER_SEND_RARP:
+ vhost_user_send_rarp(dev, &msg);
+ break;
+
+ default:
+ break;
+
+ }
+
+ return 0;
+}
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user.h
index f5332396..ba78d326 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -43,6 +43,14 @@
#define VHOST_MEMORY_MAX_NREGIONS 8
+#define VHOST_USER_PROTOCOL_F_MQ 0
+#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
+#define VHOST_USER_PROTOCOL_F_RARP 2
+
+#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+ (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+ (1ULL << VHOST_USER_PROTOCOL_F_RARP))
+
typedef enum VhostUserRequest {
VHOST_USER_NONE = 0,
VHOST_USER_GET_FEATURES = 1,
@@ -109,5 +117,12 @@ typedef struct VhostUserMsg {
/* The version of the protocol we support */
#define VHOST_USER_VERSION 0x1
-/*****************************************************************************/
+
+/* vhost_user.c */
+int vhost_user_msg_handler(int vid, int fd);
+
+/* socket.c */
+int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
+int send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
+
#endif
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
deleted file mode 100644
index e7c43479..00000000
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ /dev/null
@@ -1,470 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <rte_common.h>
-#include <rte_log.h>
-
-#include "virtio-net-user.h"
-#include "vhost-net-user.h"
-#include "vhost-net.h"
-
-struct orig_region_map {
- int fd;
- uint64_t mapped_address;
- uint64_t mapped_size;
- uint64_t blksz;
-};
-
-#define orig_region(ptr, nregions) \
- ((struct orig_region_map *)RTE_PTR_ADD((ptr), \
- sizeof(struct virtio_memory) + \
- sizeof(struct virtio_memory_regions) * (nregions)))
-
-static uint64_t
-get_blk_size(int fd)
-{
- struct stat stat;
- int ret;
-
- ret = fstat(fd, &stat);
- return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
-}
-
-static void
-free_mem_region(struct virtio_net *dev)
-{
- struct orig_region_map *region;
- unsigned int idx;
-
- if (!dev || !dev->mem)
- return;
-
- region = orig_region(dev->mem, dev->mem->nregions);
- for (idx = 0; idx < dev->mem->nregions; idx++) {
- if (region[idx].mapped_address) {
- munmap((void *)(uintptr_t)region[idx].mapped_address,
- region[idx].mapped_size);
- close(region[idx].fd);
- }
- }
-}
-
-void
-vhost_backend_cleanup(struct virtio_net *dev)
-{
- if (dev->mem) {
- free_mem_region(dev);
- free(dev->mem);
- dev->mem = NULL;
- }
- if (dev->log_addr) {
- munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
- dev->log_addr = 0;
- }
-}
-
-int
-user_set_mem_table(int vid, struct VhostUserMsg *pmsg)
-{
- struct VhostUserMemory memory = pmsg->payload.memory;
- struct virtio_memory_regions *pregion;
- uint64_t mapped_address, mapped_size;
- struct virtio_net *dev;
- unsigned int idx = 0;
- struct orig_region_map *pregion_orig;
- uint64_t alignment;
-
- /* unmap old memory regions one by one*/
- dev = get_device(vid);
- if (dev == NULL)
- return -1;
-
- /* Remove from the data plane. */
- if (dev->flags & VIRTIO_DEV_RUNNING) {
- dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(vid);
- }
-
- if (dev->mem) {
- free_mem_region(dev);
- free(dev->mem);
- dev->mem = NULL;
- }
-
- dev->mem = calloc(1,
- sizeof(struct virtio_memory) +
- sizeof(struct virtio_memory_regions) * memory.nregions +
- sizeof(struct orig_region_map) * memory.nregions);
- if (dev->mem == NULL) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%d) failed to allocate memory for dev->mem\n",
- dev->vid);
- return -1;
- }
- dev->mem->nregions = memory.nregions;
-
- pregion_orig = orig_region(dev->mem, memory.nregions);
- for (idx = 0; idx < memory.nregions; idx++) {
- pregion = &dev->mem->regions[idx];
- pregion->guest_phys_address =
- memory.regions[idx].guest_phys_addr;
- pregion->guest_phys_address_end =
- memory.regions[idx].guest_phys_addr +
- memory.regions[idx].memory_size;
- pregion->memory_size =
- memory.regions[idx].memory_size;
- pregion->userspace_address =
- memory.regions[idx].userspace_addr;
-
- /* This is ugly */
- mapped_size = memory.regions[idx].memory_size +
- memory.regions[idx].mmap_offset;
-
- /* mmap() without flag of MAP_ANONYMOUS, should be called
- * with length argument aligned with hugepagesz at older
- * longterm version Linux, like 2.6.32 and 3.2.72, or
- * mmap() will fail with EINVAL.
- *
- * to avoid failure, make sure in caller to keep length
- * aligned.
- */
- alignment = get_blk_size(pmsg->fds[idx]);
- if (alignment == (uint64_t)-1) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "couldn't get hugepage size through fstat\n");
- goto err_mmap;
- }
- mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment);
-
- mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
- mapped_size,
- PROT_READ | PROT_WRITE, MAP_SHARED,
- pmsg->fds[idx],
- 0);
-
- RTE_LOG(INFO, VHOST_CONFIG,
- "mapped region %d fd:%d to:%p sz:0x%"PRIx64" "
- "off:0x%"PRIx64" align:0x%"PRIx64"\n",
- idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address,
- mapped_size, memory.regions[idx].mmap_offset,
- alignment);
-
- if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "mmap qemu guest failed.\n");
- goto err_mmap;
- }
-
- pregion_orig[idx].mapped_address = mapped_address;
- pregion_orig[idx].mapped_size = mapped_size;
- pregion_orig[idx].blksz = alignment;
- pregion_orig[idx].fd = pmsg->fds[idx];
-
- mapped_address += memory.regions[idx].mmap_offset;
-
- pregion->address_offset = mapped_address -
- pregion->guest_phys_address;
-
- if (memory.regions[idx].guest_phys_addr == 0) {
- dev->mem->base_address =
- memory.regions[idx].userspace_addr;
- dev->mem->mapped_address =
- pregion->address_offset;
- }
-
- LOG_DEBUG(VHOST_CONFIG,
- "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
- idx,
- (void *)(uintptr_t)pregion->guest_phys_address,
- (void *)(uintptr_t)pregion->userspace_address,
- pregion->memory_size);
- }
-
- return 0;
-
-err_mmap:
- while (idx--) {
- munmap((void *)(uintptr_t)pregion_orig[idx].mapped_address,
- pregion_orig[idx].mapped_size);
- close(pregion_orig[idx].fd);
- }
- free(dev->mem);
- dev->mem = NULL;
- return -1;
-}
-
-static int
-vq_is_ready(struct vhost_virtqueue *vq)
-{
- return vq && vq->desc &&
- vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
- vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
-}
-
-static int
-virtio_is_ready(struct virtio_net *dev)
-{
- struct vhost_virtqueue *rvq, *tvq;
- uint32_t i;
-
- for (i = 0; i < dev->virt_qp_nb; i++) {
- rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
- tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
-
- if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
- RTE_LOG(INFO, VHOST_CONFIG,
- "virtio is not ready for processing.\n");
- return 0;
- }
- }
-
- RTE_LOG(INFO, VHOST_CONFIG,
- "virtio is now ready for processing.\n");
- return 1;
-}
-
-void
-user_set_vring_call(int vid, struct VhostUserMsg *pmsg)
-{
- struct vhost_vring_file file;
-
- file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
- if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
- file.fd = VIRTIO_INVALID_EVENTFD;
- else
- file.fd = pmsg->fds[0];
- RTE_LOG(INFO, VHOST_CONFIG,
- "vring call idx:%d file:%d\n", file.index, file.fd);
- vhost_set_vring_call(vid, &file);
-}
-
-
-/*
- * In vhost-user, when we receive kick message, will test whether virtio
- * device is ready for packet processing.
- */
-void
-user_set_vring_kick(int vid, struct VhostUserMsg *pmsg)
-{
- struct vhost_vring_file file;
- struct virtio_net *dev = get_device(vid);
-
- if (!dev)
- return;
-
- file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
- if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
- file.fd = VIRTIO_INVALID_EVENTFD;
- else
- file.fd = pmsg->fds[0];
- RTE_LOG(INFO, VHOST_CONFIG,
- "vring kick idx:%d file:%d\n", file.index, file.fd);
- vhost_set_vring_kick(vid, &file);
-
- if (virtio_is_ready(dev) && !(dev->flags & VIRTIO_DEV_RUNNING)) {
- if (notify_ops->new_device(vid) == 0)
- dev->flags |= VIRTIO_DEV_RUNNING;
- }
-}
-
-/*
- * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
- */
-int
-user_get_vring_base(int vid, struct vhost_vring_state *state)
-{
- struct virtio_net *dev = get_device(vid);
-
- if (dev == NULL)
- return -1;
- /* We have to stop the queue (virtio) if it is running. */
- if (dev->flags & VIRTIO_DEV_RUNNING) {
- dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(vid);
- }
-
- /* Here we are safe to get the last used index */
- vhost_get_vring_base(vid, state->index, state);
-
- RTE_LOG(INFO, VHOST_CONFIG,
- "vring base idx:%d file:%d\n", state->index, state->num);
- /*
- * Based on current qemu vhost-user implementation, this message is
- * sent and only sent in vhost_vring_stop.
- * TODO: cleanup the vring, it isn't usable since here.
- */
- if (dev->virtqueue[state->index]->kickfd >= 0)
- close(dev->virtqueue[state->index]->kickfd);
-
- dev->virtqueue[state->index]->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
-
- return 0;
-}
-
-/*
- * when virtio queues are ready to work, qemu will send us to
- * enable the virtio queue pair.
- */
-int
-user_set_vring_enable(int vid, struct vhost_vring_state *state)
-{
- struct virtio_net *dev;
- int enable = (int)state->num;
-
- dev = get_device(vid);
- if (dev == NULL)
- return -1;
-
- RTE_LOG(INFO, VHOST_CONFIG,
- "set queue enable: %d to qp idx: %d\n",
- enable, state->index);
-
- if (notify_ops->vring_state_changed)
- notify_ops->vring_state_changed(vid, state->index, enable);
-
- dev->virtqueue[state->index]->enabled = enable;
-
- return 0;
-}
-
-void
-user_set_protocol_features(int vid, uint64_t protocol_features)
-{
- struct virtio_net *dev;
-
- dev = get_device(vid);
- if (dev == NULL || protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
- return;
-
- dev->protocol_features = protocol_features;
-}
-
-int
-user_set_log_base(int vid, struct VhostUserMsg *msg)
-{
- struct virtio_net *dev;
- int fd = msg->fds[0];
- uint64_t size, off;
- void *addr;
-
- dev = get_device(vid);
- if (!dev)
- return -1;
-
- if (fd < 0) {
- RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd);
- return -1;
- }
-
- if (msg->size != sizeof(VhostUserLog)) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "invalid log base msg size: %"PRId32" != %d\n",
- msg->size, (int)sizeof(VhostUserLog));
- return -1;
- }
-
- size = msg->payload.log.mmap_size;
- off = msg->payload.log.mmap_offset;
- RTE_LOG(INFO, VHOST_CONFIG,
- "log mmap size: %"PRId64", offset: %"PRId64"\n",
- size, off);
-
- /*
- * mmap from 0 to workaround a hugepage mmap bug: mmap will
- * fail when offset is not page size aligned.
- */
- addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- close(fd);
- if (addr == MAP_FAILED) {
- RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
- return -1;
- }
-
- /*
- * Free previously mapped log memory on occasionally
- * multiple VHOST_USER_SET_LOG_BASE.
- */
- if (dev->log_addr) {
- munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
- }
- dev->log_addr = (uint64_t)(uintptr_t)addr;
- dev->log_base = dev->log_addr + off;
- dev->log_size = size;
-
- return 0;
-}
-
-/*
- * An rarp packet is constructed and broadcasted to notify switches about
- * the new location of the migrated VM, so that packets from outside will
- * not be lost after migration.
- *
- * However, we don't actually "send" a rarp packet here, instead, we set
- * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it.
- */
-int
-user_send_rarp(int vid, struct VhostUserMsg *msg)
-{
- struct virtio_net *dev;
- uint8_t *mac = (uint8_t *)&msg->payload.u64;
-
- dev = get_device(vid);
- if (!dev)
- return -1;
-
- RTE_LOG(DEBUG, VHOST_CONFIG,
- ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
- mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
- memcpy(dev->mac.addr_bytes, mac, 6);
-
- /*
- * Set the flag to inject a RARP broadcast packet at
- * rte_vhost_dequeue_burst().
- *
- * rte_smp_wmb() is for making sure the mac is copied
- * before the flag is set.
- */
- rte_smp_wmb();
- rte_atomic16_set(&dev->broadcast_rarp, 1);
-
- return 0;
-}
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
deleted file mode 100644
index 1785695b..00000000
--- a/lib/librte_vhost/virtio-net.c
+++ /dev/null
@@ -1,847 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/vhost.h>
-#include <linux/virtio_net.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <sys/mman.h>
-#include <unistd.h>
-#ifdef RTE_LIBRTE_VHOST_NUMA
-#include <numaif.h>
-#endif
-
-#include <sys/socket.h>
-
-#include <rte_ethdev.h>
-#include <rte_log.h>
-#include <rte_string_fns.h>
-#include <rte_memory.h>
-#include <rte_malloc.h>
-#include <rte_virtio_net.h>
-
-#include "vhost-net.h"
-
-#define MAX_VHOST_DEVICE 1024
-static struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
-
-/* device ops to add/remove device to/from data core. */
-struct virtio_net_device_ops const *notify_ops;
-
-#define VHOST_USER_F_PROTOCOL_FEATURES 30
-
-/* Features supported by this lib. */
-#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
- (1ULL << VIRTIO_NET_F_CTRL_VQ) | \
- (1ULL << VIRTIO_NET_F_CTRL_RX) | \
- (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
- (VHOST_SUPPORTS_MQ) | \
- (1ULL << VIRTIO_F_VERSION_1) | \
- (1ULL << VHOST_F_LOG_ALL) | \
- (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
- (1ULL << VIRTIO_NET_F_HOST_TSO4) | \
- (1ULL << VIRTIO_NET_F_HOST_TSO6) | \
- (1ULL << VIRTIO_NET_F_CSUM) | \
- (1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
- (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
- (1ULL << VIRTIO_NET_F_GUEST_TSO6))
-
-static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
-
-
-/*
- * Converts QEMU virtual address to Vhost virtual address. This function is
- * used to convert the ring addresses to our address space.
- */
-static uint64_t
-qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
-{
- struct virtio_memory_regions *region;
- uint64_t vhost_va = 0;
- uint32_t regionidx = 0;
-
- /* Find the region where the address lives. */
- for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
- region = &dev->mem->regions[regionidx];
- if ((qemu_va >= region->userspace_address) &&
- (qemu_va <= region->userspace_address +
- region->memory_size)) {
- vhost_va = qemu_va + region->guest_phys_address +
- region->address_offset -
- region->userspace_address;
- break;
- }
- }
- return vhost_va;
-}
-
-struct virtio_net *
-get_device(int vid)
-{
- struct virtio_net *dev = vhost_devices[vid];
-
- if (unlikely(!dev)) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%d) device not found.\n", vid);
- }
-
- return dev;
-}
-
-static void
-cleanup_vq(struct vhost_virtqueue *vq, int destroy)
-{
- if ((vq->callfd >= 0) && (destroy != 0))
- close(vq->callfd);
- if (vq->kickfd >= 0)
- close(vq->kickfd);
-}
-
-/*
- * Unmap any memory, close any file descriptors and
- * free any memory owned by a device.
- */
-static void
-cleanup_device(struct virtio_net *dev, int destroy)
-{
- uint32_t i;
-
- vhost_backend_cleanup(dev);
-
- for (i = 0; i < dev->virt_qp_nb; i++) {
- cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ], destroy);
- cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ], destroy);
- }
-}
-
-/*
- * Release virtqueues and device memory.
- */
-static void
-free_device(struct virtio_net *dev)
-{
- uint32_t i;
-
- for (i = 0; i < dev->virt_qp_nb; i++)
- rte_free(dev->virtqueue[i * VIRTIO_QNUM]);
-
- rte_free(dev);
-}
-
-static void
-init_vring_queue(struct vhost_virtqueue *vq, int qp_idx)
-{
- memset(vq, 0, sizeof(struct vhost_virtqueue));
-
- vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
- vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
-
- /* Backends are set to -1 indicating an inactive device. */
- vq->backend = -1;
-
- /* always set the default vq pair to enabled */
- if (qp_idx == 0)
- vq->enabled = 1;
-}
-
-static void
-init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
-{
- uint32_t base_idx = qp_idx * VIRTIO_QNUM;
-
- init_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx);
- init_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx);
-}
-
-static void
-reset_vring_queue(struct vhost_virtqueue *vq, int qp_idx)
-{
- int callfd;
-
- callfd = vq->callfd;
- init_vring_queue(vq, qp_idx);
- vq->callfd = callfd;
-}
-
-static void
-reset_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
-{
- uint32_t base_idx = qp_idx * VIRTIO_QNUM;
-
- reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx);
- reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx);
-}
-
-static int
-alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
-{
- struct vhost_virtqueue *virtqueue = NULL;
- uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
- uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
-
- virtqueue = rte_malloc(NULL,
- sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
- if (virtqueue == NULL) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "Failed to allocate memory for virt qp:%d.\n", qp_idx);
- return -1;
- }
-
- dev->virtqueue[virt_rx_q_idx] = virtqueue;
- dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
-
- init_vring_queue_pair(dev, qp_idx);
-
- dev->virt_qp_nb += 1;
-
- return 0;
-}
-
-/*
- * Reset some variables in device structure, while keeping few
- * others untouched, such as vid, ifname, virt_qp_nb: they
- * should be same unless the device is removed.
- */
-static void
-reset_device(struct virtio_net *dev)
-{
- uint32_t i;
-
- dev->features = 0;
- dev->protocol_features = 0;
- dev->flags = 0;
-
- for (i = 0; i < dev->virt_qp_nb; i++)
- reset_vring_queue_pair(dev, i);
-}
-
-/*
- * Function is called from the CUSE open function. The device structure is
- * initialised and a new entry is added to the device configuration linked
- * list.
- */
-int
-vhost_new_device(void)
-{
- struct virtio_net *dev;
- int i;
-
- dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
- if (dev == NULL) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "Failed to allocate memory for new dev.\n");
- return -1;
- }
-
- for (i = 0; i < MAX_VHOST_DEVICE; i++) {
- if (vhost_devices[i] == NULL)
- break;
- }
- if (i == MAX_VHOST_DEVICE) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "Failed to find a free slot for new device.\n");
- return -1;
- }
-
- vhost_devices[i] = dev;
- dev->vid = i;
-
- return i;
-}
-
-/*
- * Function is called from the CUSE release function. This function will
- * cleanup the device and remove it from device configuration linked list.
- */
-void
-vhost_destroy_device(int vid)
-{
- struct virtio_net *dev = get_device(vid);
-
- if (dev == NULL)
- return;
-
- if (dev->flags & VIRTIO_DEV_RUNNING) {
- dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(vid);
- }
-
- cleanup_device(dev, 1);
- free_device(dev);
-
- vhost_devices[vid] = NULL;
-}
-
-void
-vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
-{
- struct virtio_net *dev;
- unsigned int len;
-
- dev = get_device(vid);
- if (dev == NULL)
- return;
-
- len = if_len > sizeof(dev->ifname) ?
- sizeof(dev->ifname) : if_len;
-
- strncpy(dev->ifname, if_name, len);
- dev->ifname[sizeof(dev->ifname) - 1] = '\0';
-}
-
-
-/*
- * Called from CUSE IOCTL: VHOST_SET_OWNER
- * This function just returns success at the moment unless
- * the device hasn't been initialised.
- */
-int
-vhost_set_owner(int vid)
-{
- struct virtio_net *dev;
-
- dev = get_device(vid);
- if (dev == NULL)
- return -1;
-
- return 0;
-}
-
-/*
- * Called from CUSE IOCTL: VHOST_RESET_OWNER
- */
-int
-vhost_reset_owner(int vid)
-{
- struct virtio_net *dev;
-
- dev = get_device(vid);
- if (dev == NULL)
- return -1;
-
- if (dev->flags & VIRTIO_DEV_RUNNING) {
- dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(vid);
- }
-
- cleanup_device(dev, 0);
- reset_device(dev);
- return 0;
-}
-
-/*
- * Called from CUSE IOCTL: VHOST_GET_FEATURES
- * The features that we support are requested.
- */
-int
-vhost_get_features(int vid, uint64_t *pu)
-{
- struct virtio_net *dev;
-
- dev = get_device(vid);
- if (dev == NULL)
- return -1;
-
- /* Send our supported features. */
- *pu = VHOST_FEATURES;
- return 0;
-}
-
-/*
- * Called from CUSE IOCTL: VHOST_SET_FEATURES
- * We receive the negotiated features supported by us and the virtio device.
- */
-int
-vhost_set_features(int vid, uint64_t *pu)
-{
- struct virtio_net *dev;
-
- dev = get_device(vid);
- if (dev == NULL)
- return -1;
- if (*pu & ~VHOST_FEATURES)
- return -1;
-
- dev->features = *pu;
- if (dev->features &
- ((1 << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) {
- dev->vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
- } else {
- dev->vhost_hlen = sizeof(struct virtio_net_hdr);
- }
- LOG_DEBUG(VHOST_CONFIG,
- "(%d) mergeable RX buffers %s, virtio 1 %s\n",
- dev->vid,
- (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off",
- (dev->features & (1ULL << VIRTIO_F_VERSION_1)) ? "on" : "off");
-
- return 0;
-}
-
-/*
- * Called from CUSE IOCTL: VHOST_SET_VRING_NUM
- * The virtio device sends us the size of the descriptor ring.
- */
-int
-vhost_set_vring_num(int vid, struct vhost_vring_state *state)
-{
- struct virtio_net *dev;
-
- dev = get_device(vid);
- if (dev == NULL)
- return -1;
-
- /* State->index refers to the queue index. The txq is 1, rxq is 0. */
- dev->virtqueue[state->index]->size = state->num;
-
- return 0;
-}
-
-/*
- * Reallocate virtio_dev and vhost_virtqueue data structure to make them on the
- * same numa node as the memory of vring descriptor.
- */
-#ifdef RTE_LIBRTE_VHOST_NUMA
-static struct virtio_net*
-numa_realloc(struct virtio_net *dev, int index)
-{
- int oldnode, newnode;
- struct virtio_net *old_dev;
- struct vhost_virtqueue *old_vq, *vq;
- int ret;
-
- /*
- * vq is allocated on pairs, we should try to do realloc
- * on first queue of one queue pair only.
- */
- if (index % VIRTIO_QNUM != 0)
- return dev;
-
- old_dev = dev;
- vq = old_vq = dev->virtqueue[index];
-
- ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc,
- MPOL_F_NODE | MPOL_F_ADDR);
-
- /* check if we need to reallocate vq */
- ret |= get_mempolicy(&oldnode, NULL, 0, old_vq,
- MPOL_F_NODE | MPOL_F_ADDR);
- if (ret) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "Unable to get vq numa information.\n");
- return dev;
- }
- if (oldnode != newnode) {
- RTE_LOG(INFO, VHOST_CONFIG,
- "reallocate vq from %d to %d node\n", oldnode, newnode);
- vq = rte_malloc_socket(NULL, sizeof(*vq) * VIRTIO_QNUM, 0,
- newnode);
- if (!vq)
- return dev;
-
- memcpy(vq, old_vq, sizeof(*vq) * VIRTIO_QNUM);
- rte_free(old_vq);
- }
-
- /* check if we need to reallocate dev */
- ret = get_mempolicy(&oldnode, NULL, 0, old_dev,
- MPOL_F_NODE | MPOL_F_ADDR);
- if (ret) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "Unable to get dev numa information.\n");
- goto out;
- }
- if (oldnode != newnode) {
- RTE_LOG(INFO, VHOST_CONFIG,
- "reallocate dev from %d to %d node\n",
- oldnode, newnode);
- dev = rte_malloc_socket(NULL, sizeof(*dev), 0, newnode);
- if (!dev) {
- dev = old_dev;
- goto out;
- }
-
- memcpy(dev, old_dev, sizeof(*dev));
- rte_free(old_dev);
- }
-
-out:
- dev->virtqueue[index] = vq;
- dev->virtqueue[index + 1] = vq + 1;
- vhost_devices[dev->vid] = dev;
-
- return dev;
-}
-#else
-static struct virtio_net*
-numa_realloc(struct virtio_net *dev, int index __rte_unused)
-{
- return dev;
-}
-#endif
-
-/*
- * Called from CUSE IOCTL: VHOST_SET_VRING_ADDR
- * The virtio device sends us the desc, used and avail ring addresses.
- * This function then converts these to our address space.
- */
-int
-vhost_set_vring_addr(int vid, struct vhost_vring_addr *addr)
-{
- struct virtio_net *dev;
- struct vhost_virtqueue *vq;
-
- dev = get_device(vid);
- if ((dev == NULL) || (dev->mem == NULL))
- return -1;
-
- /* addr->index refers to the queue index. The txq 1, rxq is 0. */
- vq = dev->virtqueue[addr->index];
-
- /* The addresses are converted from QEMU virtual to Vhost virtual. */
- vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev,
- addr->desc_user_addr);
- if (vq->desc == 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%d) failed to find desc ring address.\n",
- dev->vid);
- return -1;
- }
-
- dev = numa_realloc(dev, addr->index);
- vq = dev->virtqueue[addr->index];
-
- vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev,
- addr->avail_user_addr);
- if (vq->avail == 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%d) failed to find avail ring address.\n",
- dev->vid);
- return -1;
- }
-
- vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev,
- addr->used_user_addr);
- if (vq->used == 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%d) failed to find used ring address.\n",
- dev->vid);
- return -1;
- }
-
- if (vq->last_used_idx != vq->used->idx) {
- RTE_LOG(WARNING, VHOST_CONFIG,
- "last_used_idx (%u) and vq->used->idx (%u) mismatches; "
- "some packets maybe resent for Tx and dropped for Rx\n",
- vq->last_used_idx, vq->used->idx);
- vq->last_used_idx = vq->used->idx;
- }
-
- vq->log_guest_addr = addr->log_guest_addr;
-
- LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
- dev->vid, vq->desc);
- LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n",
- dev->vid, vq->avail);
- LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n",
- dev->vid, vq->used);
- LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n",
- dev->vid, vq->log_guest_addr);
-
- return 0;
-}
-
-/*
- * Called from CUSE IOCTL: VHOST_SET_VRING_BASE
- * The virtio device sends us the available ring last used index.
- */
-int
-vhost_set_vring_base(int vid, struct vhost_vring_state *state)
-{
- struct virtio_net *dev;
-
- dev = get_device(vid);
- if (dev == NULL)
- return -1;
-
- /* State->index refers to the queue index. The txq is 1, rxq is 0. */
- dev->virtqueue[state->index]->last_used_idx = state->num;
-
- return 0;
-}
-
-/*
- * Called from CUSE IOCTL: VHOST_GET_VRING_BASE
- * We send the virtio device our available ring last used index.
- */
-int
-vhost_get_vring_base(int vid, uint32_t index,
- struct vhost_vring_state *state)
-{
- struct virtio_net *dev;
-
- dev = get_device(vid);
- if (dev == NULL)
- return -1;
-
- state->index = index;
- /* State->index refers to the queue index. The txq is 1, rxq is 0. */
- state->num = dev->virtqueue[state->index]->last_used_idx;
-
- return 0;
-}
-
-
-/*
- * Called from CUSE IOCTL: VHOST_SET_VRING_CALL
- * The virtio device sends an eventfd to interrupt the guest. This fd gets
- * copied into our process space.
- */
-int
-vhost_set_vring_call(int vid, struct vhost_vring_file *file)
-{
- struct virtio_net *dev;
- struct vhost_virtqueue *vq;
- uint32_t cur_qp_idx = file->index / VIRTIO_QNUM;
-
- dev = get_device(vid);
- if (dev == NULL)
- return -1;
-
- /*
- * FIXME: VHOST_SET_VRING_CALL is the first per-vring message
- * we get, so we do vring queue pair allocation here.
- */
- if (cur_qp_idx + 1 > dev->virt_qp_nb) {
- if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0)
- return -1;
- }
-
- /* file->index refers to the queue index. The txq is 1, rxq is 0. */
- vq = dev->virtqueue[file->index];
- assert(vq != NULL);
-
- if (vq->callfd >= 0)
- close(vq->callfd);
-
- vq->callfd = file->fd;
-
- return 0;
-}
-
-/*
- * Called from CUSE IOCTL: VHOST_SET_VRING_KICK
- * The virtio device sends an eventfd that it can use to notify us.
- * This fd gets copied into our process space.
- */
-int
-vhost_set_vring_kick(int vid, struct vhost_vring_file *file)
-{
- struct virtio_net *dev;
- struct vhost_virtqueue *vq;
-
- dev = get_device(vid);
- if (dev == NULL)
- return -1;
-
- /* file->index refers to the queue index. The txq is 1, rxq is 0. */
- vq = dev->virtqueue[file->index];
-
- if (vq->kickfd >= 0)
- close(vq->kickfd);
-
- vq->kickfd = file->fd;
-
- return 0;
-}
-
-/*
- * Called from CUSE IOCTL: VHOST_NET_SET_BACKEND
- * To complete device initialisation when the virtio driver is loaded,
- * we are provided with a valid fd for a tap device (not used by us).
- * If this happens then we can add the device to a data core.
- * When the virtio driver is removed we get fd=-1.
- * At that point we remove the device from the data core.
- * The device will still exist in the device configuration linked list.
- */
-int
-vhost_set_backend(int vid, struct vhost_vring_file *file)
-{
- struct virtio_net *dev;
-
- dev = get_device(vid);
- if (dev == NULL)
- return -1;
-
- /* file->index refers to the queue index. The txq is 1, rxq is 0. */
- dev->virtqueue[file->index]->backend = file->fd;
-
- /*
- * If the device isn't already running and both backend fds are set,
- * we add the device.
- */
- if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
- if (dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED &&
- dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED) {
- if (notify_ops->new_device(vid) < 0)
- return -1;
- dev->flags |= VIRTIO_DEV_RUNNING;
- }
- } else if (file->fd == VIRTIO_DEV_STOPPED) {
- dev->flags &= ~VIRTIO_DEV_RUNNING;
- notify_ops->destroy_device(vid);
- }
-
- return 0;
-}
-
-int
-rte_vhost_get_numa_node(int vid)
-{
-#ifdef RTE_LIBRTE_VHOST_NUMA
- struct virtio_net *dev = get_device(vid);
- int numa_node;
- int ret;
-
- if (dev == NULL)
- return -1;
-
- ret = get_mempolicy(&numa_node, NULL, 0, dev,
- MPOL_F_NODE | MPOL_F_ADDR);
- if (ret < 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%d) failed to query numa node: %d\n", vid, ret);
- return -1;
- }
-
- return numa_node;
-#else
- RTE_SET_USED(vid);
- return -1;
-#endif
-}
-
-uint32_t
-rte_vhost_get_queue_num(int vid)
-{
- struct virtio_net *dev = get_device(vid);
-
- if (dev == NULL)
- return 0;
-
- return dev->virt_qp_nb;
-}
-
-int
-rte_vhost_get_ifname(int vid, char *buf, size_t len)
-{
- struct virtio_net *dev = get_device(vid);
-
- if (dev == NULL)
- return -1;
-
- len = RTE_MIN(len, sizeof(dev->ifname));
-
- strncpy(buf, dev->ifname, len);
- buf[len - 1] = '\0';
-
- return 0;
-}
-
-uint16_t
-rte_vhost_avail_entries(int vid, uint16_t queue_id)
-{
- struct virtio_net *dev;
- struct vhost_virtqueue *vq;
-
- dev = get_device(vid);
- if (!dev)
- return 0;
-
- vq = dev->virtqueue[queue_id];
- if (!vq->enabled)
- return 0;
-
- return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
-}
-
-int
-rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
-{
- struct virtio_net *dev = get_device(vid);
-
- if (dev == NULL)
- return -1;
-
- if (enable) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "guest notification isn't supported.\n");
- return -1;
- }
-
- dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY;
- return 0;
-}
-
-uint64_t rte_vhost_feature_get(void)
-{
- return VHOST_FEATURES;
-}
-
-int rte_vhost_feature_disable(uint64_t feature_mask)
-{
- VHOST_FEATURES = VHOST_FEATURES & ~feature_mask;
- return 0;
-}
-
-int rte_vhost_feature_enable(uint64_t feature_mask)
-{
- if ((feature_mask & VHOST_SUPPORTED_FEATURES) == feature_mask) {
- VHOST_FEATURES = VHOST_FEATURES | feature_mask;
- return 0;
- }
- return -1;
-}
-
-/*
- * Register ops so that we can add/remove device to data core.
- */
-int
-rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const ops)
-{
- notify_ops = ops;
-
- return 0;
-}
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/virtio_net.c
index 5806f99a..595f67c4 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -45,7 +45,7 @@
#include <rte_sctp.h>
#include <rte_arp.h>
-#include "vhost-net.h"
+#include "vhost.h"
#define MAX_PKT_BURST 32
#define VHOST_LOG_PAGE 4096
@@ -91,6 +91,56 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb)
return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
}
+static inline void __attribute__((always_inline))
+do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint16_t to, uint16_t from, uint16_t size)
+{
+ rte_memcpy(&vq->used->ring[to],
+ &vq->shadow_used_ring[from],
+ size * sizeof(struct vring_used_elem));
+ vhost_log_used_vring(dev, vq,
+ offsetof(struct vring_used, ring[to]),
+ size * sizeof(struct vring_used_elem));
+}
+
+static inline void __attribute__((always_inline))
+flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+ uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
+
+ if (used_idx + vq->shadow_used_idx <= vq->size) {
+ do_flush_shadow_used_ring(dev, vq, used_idx, 0,
+ vq->shadow_used_idx);
+ } else {
+ uint16_t size;
+
+ /* update used ring interval [used_idx, vq->size] */
+ size = vq->size - used_idx;
+ do_flush_shadow_used_ring(dev, vq, used_idx, 0, size);
+
+ /* update the left half used ring interval [0, left_size] */
+ do_flush_shadow_used_ring(dev, vq, 0, size,
+ vq->shadow_used_idx - size);
+ }
+ vq->last_used_idx += vq->shadow_used_idx;
+
+ rte_smp_wmb();
+
+ *(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx;
+ vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
+ sizeof(vq->used->idx));
+}
+
+static inline void __attribute__((always_inline))
+update_shadow_used_ring(struct vhost_virtqueue *vq,
+ uint16_t desc_idx, uint16_t len)
+{
+ uint16_t i = vq->shadow_used_idx++;
+
+ vq->shadow_used_ring[i].id = desc_idx;
+ vq->shadow_used_ring[i].len = len;
+}
+
static void
virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
{
@@ -136,8 +186,8 @@ copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
}
static inline int __attribute__((always_inline))
-copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
- struct rte_mbuf *m, uint16_t desc_idx)
+copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
+ struct rte_mbuf *m, uint16_t desc_idx, uint32_t size)
{
uint32_t desc_avail, desc_offset;
uint32_t mbuf_avail, mbuf_offset;
@@ -146,7 +196,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint64_t desc_addr;
struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
- desc = &vq->desc[desc_idx];
+ desc = &descs[desc_idx];
desc_addr = gpa_to_vva(dev, desc->addr);
/*
* Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
@@ -183,10 +233,10 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
/* Room in vring buffer is not enough */
return -1;
}
- if (unlikely(desc->next >= vq->size))
+ if (unlikely(desc->next >= size))
return -1;
- desc = &vq->desc[desc->next];
+ desc = &descs[desc->next];
desc_addr = gpa_to_vva(dev, desc->addr);
if (unlikely(!desc_addr))
return -1;
@@ -226,8 +276,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
struct vhost_virtqueue *vq;
uint16_t avail_idx, free_entries, start_idx;
uint16_t desc_indexes[MAX_PKT_BURST];
+ struct vring_desc *descs;
uint16_t used_idx;
- uint32_t i;
+ uint32_t i, sz;
LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
@@ -269,7 +320,22 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
uint16_t desc_idx = desc_indexes[i];
int err;
- err = copy_mbuf_to_desc(dev, vq, pkts[i], desc_idx);
+ if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) {
+ descs = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev,
+ vq->desc[desc_idx].addr);
+ if (unlikely(!descs)) {
+ count = i;
+ break;
+ }
+
+ desc_idx = 0;
+ sz = vq->desc[desc_idx].len / sizeof(*descs);
+ } else {
+ descs = vq->desc;
+ sz = vq->size;
+ }
+
+ err = copy_mbuf_to_desc(dev, descs, pkts[i], desc_idx, sz);
if (unlikely(err)) {
used_idx = (start_idx + i) & (vq->size - 1);
vq->used->ring[used_idx].len = dev->vhost_hlen;
@@ -300,33 +366,46 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
return count;
}
-static inline int
-fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx,
- uint32_t *allocated, uint32_t *vec_idx,
- struct buf_vector *buf_vec)
+static inline int __attribute__((always_inline))
+fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint32_t avail_idx, uint32_t *vec_idx,
+ struct buf_vector *buf_vec, uint16_t *desc_chain_head,
+ uint16_t *desc_chain_len)
{
uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
uint32_t vec_id = *vec_idx;
- uint32_t len = *allocated;
+ uint32_t len = 0;
+ struct vring_desc *descs = vq->desc;
+
+ *desc_chain_head = idx;
+
+ if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
+ descs = (struct vring_desc *)(uintptr_t)
+ gpa_to_vva(dev, vq->desc[idx].addr);
+ if (unlikely(!descs))
+ return -1;
+
+ idx = 0;
+ }
while (1) {
if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
return -1;
- len += vq->desc[idx].len;
- buf_vec[vec_id].buf_addr = vq->desc[idx].addr;
- buf_vec[vec_id].buf_len = vq->desc[idx].len;
+ len += descs[idx].len;
+ buf_vec[vec_id].buf_addr = descs[idx].addr;
+ buf_vec[vec_id].buf_len = descs[idx].len;
buf_vec[vec_id].desc_idx = idx;
vec_id++;
- if ((vq->desc[idx].flags & VRING_DESC_F_NEXT) == 0)
+ if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
break;
- idx = vq->desc[idx].next;
+ idx = descs[idx].next;
}
- *allocated = len;
- *vec_idx = vec_id;
+ *desc_chain_len = len;
+ *vec_idx = vec_id;
return 0;
}
@@ -335,31 +414,34 @@ fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx,
* Returns -1 on fail, 0 on success
*/
static inline int
-reserve_avail_buf_mergeable(struct vhost_virtqueue *vq, uint32_t size,
- uint16_t *end, struct buf_vector *buf_vec)
+reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint32_t size, struct buf_vector *buf_vec,
+ uint16_t *num_buffers, uint16_t avail_head)
{
uint16_t cur_idx;
- uint16_t avail_idx;
- uint32_t allocated = 0;
uint32_t vec_idx = 0;
uint16_t tries = 0;
- cur_idx = vq->last_used_idx;
+ uint16_t head_idx = 0;
+ uint16_t len = 0;
- while (1) {
- avail_idx = *((volatile uint16_t *)&vq->avail->idx);
- if (unlikely(cur_idx == avail_idx))
+ *num_buffers = 0;
+ cur_idx = vq->last_avail_idx;
+
+ while (size > 0) {
+ if (unlikely(cur_idx == avail_head))
return -1;
- if (unlikely(fill_vec_buf(vq, cur_idx, &allocated,
- &vec_idx, buf_vec) < 0))
+ if (unlikely(fill_vec_buf(dev, vq, cur_idx, &vec_idx, buf_vec,
+ &head_idx, &len) < 0))
return -1;
+ len = RTE_MIN(len, size);
+ update_shadow_used_ring(vq, head_idx, len);
+ size -= len;
cur_idx++;
tries++;
-
- if (allocated >= size)
- break;
+ *num_buffers += 1;
/*
* if we tried all available ring items, and still
@@ -370,77 +452,50 @@ reserve_avail_buf_mergeable(struct vhost_virtqueue *vq, uint32_t size,
return -1;
}
- *end = cur_idx;
return 0;
}
-static inline uint32_t __attribute__((always_inline))
-copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
- uint16_t end_idx, struct rte_mbuf *m,
- struct buf_vector *buf_vec)
+static inline int __attribute__((always_inline))
+copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
+ struct buf_vector *buf_vec, uint16_t num_buffers)
{
struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
uint32_t vec_idx = 0;
- uint16_t start_idx = vq->last_used_idx;
- uint16_t cur_idx = start_idx;
uint64_t desc_addr;
- uint32_t desc_chain_head;
- uint32_t desc_chain_len;
uint32_t mbuf_offset, mbuf_avail;
uint32_t desc_offset, desc_avail;
uint32_t cpy_len;
- uint16_t desc_idx, used_idx;
+ uint64_t hdr_addr, hdr_phys_addr;
+ struct rte_mbuf *hdr_mbuf;
if (unlikely(m == NULL))
- return 0;
-
- LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
- dev->vid, cur_idx, end_idx);
+ return -1;
desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr)
- return 0;
+ return -1;
- rte_prefetch0((void *)(uintptr_t)desc_addr);
+ hdr_mbuf = m;
+ hdr_addr = desc_addr;
+ hdr_phys_addr = buf_vec[vec_idx].buf_addr;
+ rte_prefetch0((void *)(uintptr_t)hdr_addr);
- virtio_hdr.num_buffers = end_idx - start_idx;
+ virtio_hdr.num_buffers = num_buffers;
LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
- dev->vid, virtio_hdr.num_buffers);
-
- virtio_enqueue_offload(m, &virtio_hdr.hdr);
- copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
- vhost_log_write(dev, buf_vec[vec_idx].buf_addr, dev->vhost_hlen);
- PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
+ dev->vid, num_buffers);
desc_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
desc_offset = dev->vhost_hlen;
- desc_chain_head = buf_vec[vec_idx].desc_idx;
- desc_chain_len = desc_offset;
mbuf_avail = rte_pktmbuf_data_len(m);
mbuf_offset = 0;
while (mbuf_avail != 0 || m->next != NULL) {
/* done with current desc buf, get the next one */
if (desc_avail == 0) {
- desc_idx = buf_vec[vec_idx].desc_idx;
vec_idx++;
-
- if (!(vq->desc[desc_idx].flags & VRING_DESC_F_NEXT)) {
- /* Update used ring with desc information */
- used_idx = cur_idx++ & (vq->size - 1);
- vq->used->ring[used_idx].id = desc_chain_head;
- vq->used->ring[used_idx].len = desc_chain_len;
- vhost_log_used_vring(dev, vq,
- offsetof(struct vring_used,
- ring[used_idx]),
- sizeof(vq->used->ring[used_idx]));
- desc_chain_head = buf_vec[vec_idx].desc_idx;
- desc_chain_len = 0;
- }
-
desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
if (unlikely(!desc_addr))
- return 0;
+ return -1;
/* Prefetch buffer address. */
rte_prefetch0((void *)(uintptr_t)desc_addr);
@@ -456,6 +511,16 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
mbuf_avail = rte_pktmbuf_data_len(m);
}
+ if (hdr_addr) {
+ virtio_enqueue_offload(hdr_mbuf, &virtio_hdr.hdr);
+ copy_virtio_net_hdr(dev, hdr_addr, virtio_hdr);
+ vhost_log_write(dev, hdr_phys_addr, dev->vhost_hlen);
+ PRINT_PACKET(dev, (uintptr_t)hdr_addr,
+ dev->vhost_hlen, 0);
+
+ hdr_addr = 0;
+ }
+
cpy_len = RTE_MIN(desc_avail, mbuf_avail);
rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
@@ -469,17 +534,9 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
mbuf_offset += cpy_len;
desc_avail -= cpy_len;
desc_offset += cpy_len;
- desc_chain_len += cpy_len;
}
- used_idx = cur_idx & (vq->size - 1);
- vq->used->ring[used_idx].id = desc_chain_head;
- vq->used->ring[used_idx].len = desc_chain_len;
- vhost_log_used_vring(dev, vq,
- offsetof(struct vring_used, ring[used_idx]),
- sizeof(vq->used->ring[used_idx]));
-
- return end_idx - start_idx;
+ return 0;
}
static inline uint32_t __attribute__((always_inline))
@@ -487,9 +544,10 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
struct rte_mbuf **pkts, uint32_t count)
{
struct vhost_virtqueue *vq;
- uint32_t pkt_idx = 0, nr_used = 0;
- uint16_t end;
+ uint32_t pkt_idx = 0;
+ uint16_t num_buffers;
struct buf_vector buf_vec[BUF_VECTOR_MAX];
+ uint16_t avail_head;
LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
@@ -506,28 +564,39 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
if (count == 0)
return 0;
+ rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
+
+ vq->shadow_used_idx = 0;
+ avail_head = *((volatile uint16_t *)&vq->avail->idx);
for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
- if (unlikely(reserve_avail_buf_mergeable(vq, pkt_len,
- &end, buf_vec) < 0)) {
+ if (unlikely(reserve_avail_buf_mergeable(dev, vq,
+ pkt_len, buf_vec, &num_buffers,
+ avail_head) < 0)) {
LOG_DEBUG(VHOST_DATA,
"(%d) failed to get enough desc from vring\n",
dev->vid);
+ vq->shadow_used_idx -= num_buffers;
break;
}
- nr_used = copy_mbuf_to_desc_mergeable(dev, vq, end,
- pkts[pkt_idx], buf_vec);
- rte_smp_wmb();
+ LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
+ dev->vid, vq->last_avail_idx,
+ vq->last_avail_idx + num_buffers);
- *(volatile uint16_t *)&vq->used->idx += nr_used;
- vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
- sizeof(vq->used->idx));
- vq->last_used_idx += nr_used;
+ if (copy_mbuf_to_desc_mergeable(dev, pkts[pkt_idx],
+ buf_vec, num_buffers) < 0) {
+ vq->shadow_used_idx -= num_buffers;
+ break;
+ }
+
+ vq->last_avail_idx += num_buffers;
}
- if (likely(pkt_idx)) {
+ if (likely(vq->shadow_used_idx)) {
+ flush_shadow_used_ring(dev, vq);
+
/* flush used->idx update before we read avail->flags. */
rte_mb();
@@ -555,6 +624,18 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
return virtio_dev_rx(dev, queue_id, pkts, count);
}
+static inline bool
+virtio_net_with_host_offload(struct virtio_net *dev)
+{
+ if (dev->features &
+ (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_ECN |
+ VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
+ VIRTIO_NET_F_HOST_UFO))
+ return true;
+
+ return false;
+}
+
static void
parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr)
{
@@ -607,6 +688,9 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
void *l4_hdr = NULL;
struct tcp_hdr *tcp_hdr = NULL;
+ if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
+ return;
+
parse_ethernet(m, &l4_proto, &l4_hdr);
if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
if (hdr->csum_start == (m->l2_len + m->l3_len)) {
@@ -685,9 +769,15 @@ make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
return 0;
}
+static inline void __attribute__((always_inline))
+put_zmbuf(struct zcopy_mbuf *zmbuf)
+{
+ zmbuf->in_use = 0;
+}
+
static inline int __attribute__((always_inline))
-copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
- struct rte_mbuf *m, uint16_t desc_idx,
+copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
+ uint16_t max_desc, struct rte_mbuf *m, uint16_t desc_idx,
struct rte_mempool *mbuf_pool)
{
struct vring_desc *desc;
@@ -696,20 +786,23 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint32_t mbuf_avail, mbuf_offset;
uint32_t cpy_len;
struct rte_mbuf *cur = m, *prev = m;
- struct virtio_net_hdr *hdr;
+ struct virtio_net_hdr *hdr = NULL;
/* A counter to avoid desc dead loop chain */
uint32_t nr_desc = 1;
- desc = &vq->desc[desc_idx];
- if (unlikely(desc->len < dev->vhost_hlen))
+ desc = &descs[desc_idx];
+ if (unlikely((desc->len < dev->vhost_hlen)) ||
+ (desc->flags & VRING_DESC_F_INDIRECT))
return -1;
desc_addr = gpa_to_vva(dev, desc->addr);
if (unlikely(!desc_addr))
return -1;
- hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
- rte_prefetch0(hdr);
+ if (virtio_net_with_host_offload(dev)) {
+ hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+ rte_prefetch0(hdr);
+ }
/*
* A virtio driver normally uses at least 2 desc buffers
@@ -718,31 +811,56 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
*/
if (likely((desc->len == dev->vhost_hlen) &&
(desc->flags & VRING_DESC_F_NEXT) != 0)) {
- desc = &vq->desc[desc->next];
+ desc = &descs[desc->next];
+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
+ return -1;
desc_addr = gpa_to_vva(dev, desc->addr);
if (unlikely(!desc_addr))
return -1;
- rte_prefetch0((void *)(uintptr_t)desc_addr);
-
desc_offset = 0;
desc_avail = desc->len;
nr_desc += 1;
-
- PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
} else {
desc_avail = desc->len - dev->vhost_hlen;
desc_offset = dev->vhost_hlen;
}
+ rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+
+ PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
+
mbuf_offset = 0;
mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
while (1) {
+ uint64_t hpa;
+
cpy_len = RTE_MIN(desc_avail, mbuf_avail);
- rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset),
- (void *)((uintptr_t)(desc_addr + desc_offset)),
- cpy_len);
+
+ /*
+ * A desc buf might across two host physical pages that are
+ * not continuous. In such case (gpa_to_hpa returns 0), data
+ * will be copied even though zero copy is enabled.
+ */
+ if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
+ desc->addr + desc_offset, cpy_len)))) {
+ cur->data_len = cpy_len;
+ cur->data_off = 0;
+ cur->buf_addr = (void *)(uintptr_t)desc_addr;
+ cur->buf_physaddr = hpa;
+
+ /*
+ * In zero copy mode, one mbuf can only reference data
+ * for one or partial of one desc buff.
+ */
+ mbuf_avail = cpy_len;
+ } else {
+ rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
+ mbuf_offset),
+ (void *)((uintptr_t)(desc_addr + desc_offset)),
+ cpy_len);
+ }
mbuf_avail -= cpy_len;
mbuf_offset += cpy_len;
@@ -754,10 +872,12 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
if ((desc->flags & VRING_DESC_F_NEXT) == 0)
break;
- if (unlikely(desc->next >= vq->size ||
- ++nr_desc > vq->size))
+ if (unlikely(desc->next >= max_desc ||
+ ++nr_desc > max_desc))
+ return -1;
+ desc = &descs[desc->next];
+ if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
return -1;
- desc = &vq->desc[desc->next];
desc_addr = gpa_to_vva(dev, desc->addr);
if (unlikely(!desc_addr))
@@ -797,12 +917,86 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
prev->data_len = mbuf_offset;
m->pkt_len += mbuf_offset;
- if (hdr->flags != 0 || hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE)
+ if (hdr)
vhost_dequeue_offload(hdr, m);
return 0;
}
+static inline void __attribute__((always_inline))
+update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint32_t used_idx, uint32_t desc_idx)
+{
+ vq->used->ring[used_idx].id = desc_idx;
+ vq->used->ring[used_idx].len = 0;
+ vhost_log_used_vring(dev, vq,
+ offsetof(struct vring_used, ring[used_idx]),
+ sizeof(vq->used->ring[used_idx]));
+}
+
+static inline void __attribute__((always_inline))
+update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint32_t count)
+{
+ if (unlikely(count == 0))
+ return;
+
+ rte_smp_wmb();
+ rte_smp_rmb();
+
+ vq->used->idx += count;
+ vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
+ sizeof(vq->used->idx));
+
+ /* Kick guest if required. */
+ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+ && (vq->callfd >= 0))
+ eventfd_write(vq->callfd, (eventfd_t)1);
+}
+
+static inline struct zcopy_mbuf *__attribute__((always_inline))
+get_zmbuf(struct vhost_virtqueue *vq)
+{
+ uint16_t i;
+ uint16_t last;
+ int tries = 0;
+
+ /* search [last_zmbuf_idx, zmbuf_size) */
+ i = vq->last_zmbuf_idx;
+ last = vq->zmbuf_size;
+
+again:
+ for (; i < last; i++) {
+ if (vq->zmbufs[i].in_use == 0) {
+ vq->last_zmbuf_idx = i + 1;
+ vq->zmbufs[i].in_use = 1;
+ return &vq->zmbufs[i];
+ }
+ }
+
+ tries++;
+ if (tries == 1) {
+ /* search [0, last_zmbuf_idx) */
+ i = 0;
+ last = vq->last_zmbuf_idx;
+ goto again;
+ }
+
+ return NULL;
+}
+
+static inline bool __attribute__((always_inline))
+mbuf_is_consumed(struct rte_mbuf *m)
+{
+ while (m) {
+ if (rte_mbuf_refcnt_read(m) > 1)
+ return false;
+ m = m->next;
+ }
+
+ return true;
+}
+
uint16_t
rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
@@ -830,6 +1024,30 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
if (unlikely(vq->enabled == 0))
return 0;
+ if (unlikely(dev->dequeue_zero_copy)) {
+ struct zcopy_mbuf *zmbuf, *next;
+ int nr_updated = 0;
+
+ for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
+ zmbuf != NULL; zmbuf = next) {
+ next = TAILQ_NEXT(zmbuf, next);
+
+ if (mbuf_is_consumed(zmbuf->mbuf)) {
+ used_idx = vq->last_used_idx++ & (vq->size - 1);
+ update_used_ring(dev, vq, used_idx,
+ zmbuf->desc_idx);
+ nr_updated += 1;
+
+ TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
+ rte_pktmbuf_free(zmbuf->mbuf);
+ put_zmbuf(zmbuf);
+ vq->nr_zmbuf -= 1;
+ }
+ }
+
+ update_used_idx(dev, vq, nr_updated);
+ }
+
/*
* Construct a RARP broadcast packet, and inject it to the "pkts"
* array, to looks like that guest actually send such packet.
@@ -853,16 +1071,17 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
}
}
- avail_idx = *((volatile uint16_t *)&vq->avail->idx);
- free_entries = avail_idx - vq->last_used_idx;
+ free_entries = *((volatile uint16_t *)&vq->avail->idx) -
+ vq->last_avail_idx;
if (free_entries == 0)
goto out;
LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
- /* Prefetch available ring to retrieve head indexes. */
- used_idx = vq->last_used_idx & (vq->size - 1);
- rte_prefetch0(&vq->avail->ring[used_idx]);
+ /* Prefetch available and used ring */
+ avail_idx = vq->last_avail_idx & (vq->size - 1);
+ used_idx = vq->last_used_idx & (vq->size - 1);
+ rte_prefetch0(&vq->avail->ring[avail_idx]);
rte_prefetch0(&vq->used->ring[used_idx]);
count = RTE_MIN(count, MAX_PKT_BURST);
@@ -872,49 +1091,81 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
/* Retrieve all of the head indexes first to avoid caching issues. */
for (i = 0; i < count; i++) {
- used_idx = (vq->last_used_idx + i) & (vq->size - 1);
- desc_indexes[i] = vq->avail->ring[used_idx];
+ avail_idx = (vq->last_avail_idx + i) & (vq->size - 1);
+ used_idx = (vq->last_used_idx + i) & (vq->size - 1);
+ desc_indexes[i] = vq->avail->ring[avail_idx];
- vq->used->ring[used_idx].id = desc_indexes[i];
- vq->used->ring[used_idx].len = 0;
- vhost_log_used_vring(dev, vq,
- offsetof(struct vring_used, ring[used_idx]),
- sizeof(vq->used->ring[used_idx]));
+ if (likely(dev->dequeue_zero_copy == 0))
+ update_used_ring(dev, vq, used_idx, desc_indexes[i]);
}
/* Prefetch descriptor index. */
rte_prefetch0(&vq->desc[desc_indexes[0]]);
for (i = 0; i < count; i++) {
+ struct vring_desc *desc;
+ uint16_t sz, idx;
int err;
if (likely(i + 1 < count))
rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
+ if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
+ desc = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev,
+ vq->desc[desc_indexes[i]].addr);
+ if (unlikely(!desc))
+ break;
+
+ rte_prefetch0(desc);
+ sz = vq->desc[desc_indexes[i]].len / sizeof(*desc);
+ idx = 0;
+ } else {
+ desc = vq->desc;
+ sz = vq->size;
+ idx = desc_indexes[i];
+ }
+
pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
if (unlikely(pkts[i] == NULL)) {
RTE_LOG(ERR, VHOST_DATA,
"Failed to allocate memory for mbuf.\n");
break;
}
- err = copy_desc_to_mbuf(dev, vq, pkts[i], desc_indexes[i],
- mbuf_pool);
+
+ err = copy_desc_to_mbuf(dev, desc, sz, pkts[i], idx, mbuf_pool);
if (unlikely(err)) {
rte_pktmbuf_free(pkts[i]);
break;
}
- }
- rte_smp_wmb();
- rte_smp_rmb();
- vq->used->idx += i;
- vq->last_used_idx += i;
- vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
- sizeof(vq->used->idx));
+ if (unlikely(dev->dequeue_zero_copy)) {
+ struct zcopy_mbuf *zmbuf;
- /* Kick guest if required. */
- if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
- && (vq->callfd >= 0))
- eventfd_write(vq->callfd, (eventfd_t)1);
+ zmbuf = get_zmbuf(vq);
+ if (!zmbuf) {
+ rte_pktmbuf_free(pkts[i]);
+ break;
+ }
+ zmbuf->mbuf = pkts[i];
+ zmbuf->desc_idx = desc_indexes[i];
+
+ /*
+ * Pin lock the mbuf; we will check later to see
+ * whether the mbuf is freed (when we are the last
+ * user) or not. If that's the case, we then could
+ * update the used ring safely.
+ */
+ rte_mbuf_refcnt_update(pkts[i], 1);
+
+ vq->nr_zmbuf += 1;
+ TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
+ }
+ }
+ vq->last_avail_idx += i;
+
+ if (likely(dev->dequeue_zero_copy == 0)) {
+ vq->last_used_idx += i;
+ update_used_idx(dev, vq, i);
+ }
out:
if (unlikely(rarp_mbuf != NULL)) {