From 3d9b72106bd664b1267533e7278ff817f942e3c6 Mon Sep 17 00:00:00 2001 From: Christian Ehrhardt Date: Thu, 8 Dec 2016 14:07:29 +0100 Subject: Imported Upstream version 16.11 Change-Id: I1944c65ddc88a9ad70f8c0eb6731552b84fbcb77 Signed-off-by: Christian Ehrhardt --- lib/Makefile | 1 - lib/librte_acl/Makefile | 2 + lib/librte_acl/acl.h | 4 + lib/librte_acl/acl_run.h | 2 + lib/librte_acl/acl_run_altivec.c | 47 + lib/librte_acl/acl_run_altivec.h | 329 +++++ lib/librte_acl/rte_acl.c | 13 + lib/librte_acl/rte_acl.h | 3 +- lib/librte_cfgfile/rte_cfgfile.h | 33 +- lib/librte_cmdline/cmdline.h | 1 + lib/librte_cmdline/cmdline_parse_portlist.h | 1 + lib/librte_cmdline/cmdline_socket.h | 3 + lib/librte_cryptodev/Makefile | 2 +- lib/librte_cryptodev/rte_crypto.h | 2 + lib/librte_cryptodev/rte_crypto_sym.h | 89 +- lib/librte_cryptodev/rte_cryptodev.c | 72 +- lib/librte_cryptodev/rte_cryptodev.h | 60 +- lib/librte_cryptodev/rte_cryptodev_pmd.h | 51 +- lib/librte_cryptodev/rte_cryptodev_version.map | 9 +- lib/librte_eal/bsdapp/eal/Makefile | 6 +- lib/librte_eal/bsdapp/eal/eal.c | 9 +- lib/librte_eal/bsdapp/eal/eal_log.c | 57 - lib/librte_eal/bsdapp/eal/eal_pci.c | 54 +- lib/librte_eal/bsdapp/eal/rte_eal_version.map | 12 + lib/librte_eal/common/Makefile | 4 +- lib/librte_eal/common/eal_common_cpuflags.c | 6 - lib/librte_eal/common/eal_common_dev.c | 95 +- lib/librte_eal/common/eal_common_log.c | 49 +- lib/librte_eal/common/eal_common_memzone.c | 12 - lib/librte_eal/common/eal_common_options.c | 2 +- lib/librte_eal/common/eal_common_pci.c | 59 +- lib/librte_eal/common/eal_common_timer.c | 17 +- lib/librte_eal/common/eal_common_vdev.c | 116 ++ lib/librte_eal/common/eal_filesystem.h | 11 - lib/librte_eal/common/eal_hugepages.h | 3 - lib/librte_eal/common/eal_private.h | 50 +- .../common/include/arch/arm/rte_byteorder.h | 2 + .../common/include/arch/arm/rte_cycles_64.h | 33 + .../common/include/arch/arm/rte_memcpy_32.h | 3 +- .../common/include/arch/arm/rte_prefetch_32.h | 1 + .../common/include/arch/arm/rte_prefetch_64.h | 1 + lib/librte_eal/common/include/arch/arm/rte_vect.h | 1 + .../common/include/arch/ppc_64/rte_atomic.h | 1 + .../common/include/arch/ppc_64/rte_byteorder.h | 1 + .../common/include/arch/ppc_64/rte_cycles.h | 2 + .../common/include/arch/ppc_64/rte_memcpy.h | 3 +- .../common/include/arch/ppc_64/rte_prefetch.h | 1 + .../common/include/arch/ppc_64/rte_vect.h | 60 + .../common/include/arch/x86/rte_atomic.h | 2 + .../common/include/arch/x86/rte_atomic_32.h | 9 + .../common/include/arch/x86/rte_atomic_64.h | 8 + .../common/include/arch/x86/rte_byteorder.h | 2 + .../common/include/arch/x86/rte_byteorder_32.h | 7 + .../common/include/arch/x86/rte_byteorder_64.h | 7 + .../common/include/arch/x86/rte_cycles.h | 2 + .../common/include/arch/x86/rte_memcpy.h | 4 +- .../common/include/arch/x86/rte_prefetch.h | 1 + lib/librte_eal/common/include/arch/x86/rte_rtm.h | 1 + lib/librte_eal/common/include/arch/x86/rte_vect.h | 8 +- lib/librte_eal/common/include/generic/rte_atomic.h | 1 + .../common/include/generic/rte_byteorder.h | 2 + .../common/include/generic/rte_cpuflags.h | 3 + lib/librte_eal/common/include/generic/rte_cycles.h | 24 +- lib/librte_eal/common/include/generic/rte_memcpy.h | 4 + lib/librte_eal/common/include/rte_common.h | 22 +- lib/librte_eal/common/include/rte_dev.h | 88 +- lib/librte_eal/common/include/rte_devargs.h | 9 +- lib/librte_eal/common/include/rte_eal.h | 4 + lib/librte_eal/common/include/rte_interrupts.h | 2 + lib/librte_eal/common/include/rte_log.h | 46 +- lib/librte_eal/common/include/rte_malloc.h | 2 +- lib/librte_eal/common/include/rte_memory.h | 9 +- lib/librte_eal/common/include/rte_memzone.h | 11 +- lib/librte_eal/common/include/rte_pci.h | 62 +- lib/librte_eal/common/include/rte_pci_dev_ids.h | 326 ----- lib/librte_eal/common/include/rte_tailq.h | 6 +- lib/librte_eal/common/include/rte_time.h | 8 + lib/librte_eal/common/include/rte_vdev.h | 102 ++ lib/librte_eal/common/include/rte_version.h | 5 +- lib/librte_eal/common/malloc_heap.c | 8 - lib/librte_eal/linuxapp/eal/Makefile | 12 +- lib/librte_eal/linuxapp/eal/eal.c | 38 +- lib/librte_eal/linuxapp/eal/eal_ivshmem.c | 954 ------------- lib/librte_eal/linuxapp/eal/eal_log.c | 40 +- lib/librte_eal/linuxapp/eal/eal_memory.c | 327 +---- lib/librte_eal/linuxapp/eal/eal_pci.c | 23 +- .../linuxapp/eal/include/exec-env/rte_interrupts.h | 1 + .../linuxapp/eal/include/exec-env/rte_kni_common.h | 10 +- lib/librte_eal/linuxapp/eal/rte_eal_version.map | 12 + lib/librte_eal/linuxapp/kni/Makefile | 5 - lib/librte_eal/linuxapp/kni/compat.h | 31 +- lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING | 339 ----- .../linuxapp/kni/ethtool/igb/e1000_82575.c | 2 +- .../linuxapp/kni/ethtool/igb/e1000_82575.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_api.c | 2 +- .../linuxapp/kni/ethtool/igb/e1000_api.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_defines.h | 2 +- lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_i210.c | 2 +- .../linuxapp/kni/ethtool/igb/e1000_i210.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_mac.c | 2 +- .../linuxapp/kni/ethtool/igb/e1000_mac.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_manage.c | 2 +- .../linuxapp/kni/ethtool/igb/e1000_manage.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_mbx.c | 2 +- .../linuxapp/kni/ethtool/igb/e1000_mbx.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_nvm.c | 2 +- .../linuxapp/kni/ethtool/igb/e1000_nvm.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_osdep.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_phy.c | 2 +- .../linuxapp/kni/ethtool/igb/e1000_phy.h | 2 +- .../linuxapp/kni/ethtool/igb/e1000_regs.h | 2 +- lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h | 2 +- .../linuxapp/kni/ethtool/igb/igb_debugfs.c | 28 - .../linuxapp/kni/ethtool/igb/igb_ethtool.c | 2 +- .../linuxapp/kni/ethtool/igb/igb_hwmon.c | 260 ---- lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c | 3 +- .../linuxapp/kni/ethtool/igb/igb_param.c | 2 +- .../linuxapp/kni/ethtool/igb/igb_procfs.c | 363 ----- lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c | 944 ------------- .../linuxapp/kni/ethtool/igb/igb_regtest.h | 2 +- lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c | 2 +- lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h | 2 +- lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c | 1482 -------------------- lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h | 21 +- .../linuxapp/kni/ethtool/igb/kcompat_ethtool.c | 1171 ---------------- lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING | 339 ----- lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_api.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_api.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_common.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_common.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_main.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h | 73 - .../linuxapp/kni/ethtool/ixgbe/ixgbe_type.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h | 2 +- .../linuxapp/kni/ethtool/ixgbe/kcompat.c | 2 +- .../linuxapp/kni/ethtool/ixgbe/kcompat.h | 14 +- lib/librte_eal/linuxapp/kni/kni_dev.h | 59 +- lib/librte_eal/linuxapp/kni/kni_ethtool.c | 39 +- lib/librte_eal/linuxapp/kni/kni_fifo.h | 30 +- lib/librte_eal/linuxapp/kni/kni_misc.c | 515 ++++--- lib/librte_eal/linuxapp/kni/kni_net.c | 471 ++++--- lib/librte_eal/linuxapp/kni/kni_vhost.c | 199 ++- lib/librte_ether/Makefile | 7 +- lib/librte_ether/rte_dev_info.h | 2 + lib/librte_ether/rte_eth_ctrl.h | 4 + lib/librte_ether/rte_ethdev.c | 285 +--- lib/librte_ether/rte_ethdev.h | 71 +- lib/librte_ether/rte_ether.h | 416 ------ lib/librte_ether/rte_ether_version.map | 10 +- lib/librte_hash/rte_cuckoo_hash.c | 472 +++---- lib/librte_hash/rte_cuckoo_hash.h | 70 +- lib/librte_hash/rte_cuckoo_hash_x86.h | 25 +- lib/librte_hash/rte_fbk_hash.h | 2 +- lib/librte_hash/rte_thash.h | 3 + lib/librte_ip_frag/Makefile | 1 + lib/librte_ip_frag/rte_ip_frag.h | 2 +- lib/librte_ivshmem/Makefile | 54 - lib/librte_ivshmem/rte_ivshmem.c | 919 ------------ lib/librte_ivshmem/rte_ivshmem.h | 165 --- lib/librte_ivshmem/rte_ivshmem_version.map | 12 - lib/librte_kni/rte_kni.c | 74 +- lib/librte_kni/rte_kni.h | 3 +- lib/librte_lpm/Makefile | 2 + lib/librte_lpm/rte_lpm.c | 22 +- lib/librte_lpm/rte_lpm.h | 9 +- lib/librte_lpm/rte_lpm_altivec.h | 154 ++ lib/librte_lpm/rte_lpm_neon.h | 1 + lib/librte_lpm/rte_lpm_sse.h | 1 + lib/librte_mbuf/Makefile | 4 +- lib/librte_mbuf/rte_mbuf.c | 155 +- lib/librte_mbuf/rte_mbuf.h | 671 +++------ lib/librte_mbuf/rte_mbuf_ptype.c | 227 +++ lib/librte_mbuf/rte_mbuf_ptype.h | 668 +++++++++ lib/librte_mbuf/rte_mbuf_version.map | 17 + lib/librte_mempool/rte_mempool.c | 6 +- lib/librte_mempool/rte_mempool.h | 14 +- lib/librte_meter/rte_meter.h | 24 +- lib/librte_net/Makefile | 14 +- lib/librte_net/rte_ether.h | 417 ++++++ lib/librte_net/rte_gre.h | 71 + lib/librte_net/rte_ip.h | 71 + lib/librte_net/rte_net.c | 517 +++++++ lib/librte_net/rte_net.h | 94 ++ lib/librte_net/rte_net_version.map | 6 + lib/librte_pdump/rte_pdump.c | 41 +- lib/librte_pdump/rte_pdump.h | 4 + lib/librte_pipeline/rte_pipeline.h | 4 +- lib/librte_port/Makefile | 2 + lib/librte_port/rte_port_fd.c | 552 ++++++++ lib/librte_port/rte_port_fd.h | 105 ++ lib/librte_port/rte_port_source_sink.h | 4 +- lib/librte_port/rte_port_version.map | 9 + lib/librte_reorder/rte_reorder.h | 2 + lib/librte_ring/rte_ring.h | 4 +- lib/librte_sched/rte_bitmap.h | 3 +- lib/librte_sched/rte_reciprocal.h | 2 + lib/librte_sched/rte_sched_common.h | 1 + lib/librte_table/Makefile | 4 +- lib/librte_table/rte_table_hash.h | 31 +- lib/librte_table/rte_table_hash_cuckoo.c | 382 +++++ lib/librte_table/rte_table_hash_key16.c | 4 +- lib/librte_table/rte_table_hash_key32.c | 4 +- lib/librte_table/rte_table_hash_key8.c | 4 +- lib/librte_table/rte_table_version.map | 7 + lib/librte_timer/rte_timer.h | 2 + lib/librte_vhost/Makefile | 13 +- lib/librte_vhost/eventfd_link/Makefile | 41 - lib/librte_vhost/eventfd_link/eventfd_link.c | 277 ---- lib/librte_vhost/eventfd_link/eventfd_link.h | 94 -- lib/librte_vhost/fd_man.c | 299 ++++ lib/librte_vhost/fd_man.h | 67 + lib/librte_vhost/libvirt/qemu-wrap.py | 387 ----- lib/librte_vhost/rte_virtio_net.h | 10 +- lib/librte_vhost/socket.c | 615 ++++++++ lib/librte_vhost/vhost-net.h | 250 ---- lib/librte_vhost/vhost.c | 430 ++++++ lib/librte_vhost/vhost.h | 293 ++++ lib/librte_vhost/vhost_cuse/eventfd_copy.c | 104 -- lib/librte_vhost/vhost_cuse/eventfd_copy.h | 45 - lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 431 ------ lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 433 ------ lib/librte_vhost/vhost_cuse/virtio-net-cdev.h | 56 - lib/librte_vhost/vhost_rxtx.c | 931 ------------ lib/librte_vhost/vhost_user.c | 1033 ++++++++++++++ lib/librte_vhost/vhost_user.h | 128 ++ lib/librte_vhost/vhost_user/fd_man.c | 299 ---- lib/librte_vhost/vhost_user/fd_man.h | 67 - lib/librte_vhost/vhost_user/vhost-net-user.c | 795 ----------- lib/librte_vhost/vhost_user/vhost-net-user.h | 113 -- lib/librte_vhost/vhost_user/virtio-net-user.c | 470 ------- lib/librte_vhost/vhost_user/virtio-net-user.h | 62 - lib/librte_vhost/virtio-net.c | 847 ----------- lib/librte_vhost/virtio_net.c | 1182 ++++++++++++++++ 247 files changed, 10385 insertions(+), 16407 deletions(-) create mode 100644 lib/librte_acl/acl_run_altivec.c create mode 100644 lib/librte_acl/acl_run_altivec.h delete mode 100644 lib/librte_eal/bsdapp/eal/eal_log.c create mode 100644 lib/librte_eal/common/eal_common_vdev.c create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_vect.h delete mode 100644 lib/librte_eal/common/include/rte_pci_dev_ids.h create mode 100644 lib/librte_eal/common/include/rte_vdev.h delete mode 100644 lib/librte_eal/linuxapp/eal/eal_ivshmem.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING delete mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h delete mode 100644 lib/librte_ether/rte_ether.h delete mode 100644 lib/librte_ivshmem/Makefile delete mode 100644 lib/librte_ivshmem/rte_ivshmem.c delete mode 100644 lib/librte_ivshmem/rte_ivshmem.h delete mode 100644 lib/librte_ivshmem/rte_ivshmem_version.map create mode 100644 lib/librte_lpm/rte_lpm_altivec.h create mode 100644 lib/librte_mbuf/rte_mbuf_ptype.c create mode 100644 lib/librte_mbuf/rte_mbuf_ptype.h create mode 100644 lib/librte_net/rte_ether.h create mode 100644 lib/librte_net/rte_gre.h create mode 100644 lib/librte_net/rte_net.c create mode 100644 lib/librte_net/rte_net.h create mode 100644 lib/librte_net/rte_net_version.map create mode 100644 lib/librte_port/rte_port_fd.c create mode 100644 lib/librte_port/rte_port_fd.h create mode 100644 lib/librte_table/rte_table_hash_cuckoo.c delete mode 100644 lib/librte_vhost/eventfd_link/Makefile delete mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c delete mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h create mode 100644 lib/librte_vhost/fd_man.c create mode 100644 lib/librte_vhost/fd_man.h delete mode 100755 lib/librte_vhost/libvirt/qemu-wrap.py create mode 100644 lib/librte_vhost/socket.c delete mode 100644 lib/librte_vhost/vhost-net.h create mode 100644 lib/librte_vhost/vhost.c create mode 100644 lib/librte_vhost/vhost.h delete mode 100644 lib/librte_vhost/vhost_cuse/eventfd_copy.c delete mode 100644 lib/librte_vhost/vhost_cuse/eventfd_copy.h delete mode 100644 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c delete mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c delete mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h delete mode 100644 lib/librte_vhost/vhost_rxtx.c create mode 100644 lib/librte_vhost/vhost_user.c create mode 100644 lib/librte_vhost/vhost_user.h delete mode 100644 lib/librte_vhost/vhost_user/fd_man.c delete mode 100644 lib/librte_vhost/vhost_user/fd_man.h delete mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c delete mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h delete mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c delete mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h delete mode 100644 lib/librte_vhost/virtio-net.c create mode 100644 lib/librte_vhost/virtio_net.c (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index ca7c02fd..990f23a4 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -61,7 +61,6 @@ DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += librte_pdump ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y) DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni -DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += librte_ivshmem endif include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile index 9803e9dd..d05be665 100644 --- a/lib/librte_acl/Makefile +++ b/lib/librte_acl/Makefile @@ -52,6 +52,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_scalar.c ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),) SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_neon.c CFLAGS_acl_run_neon.o += -flax-vector-conversions -Wno-maybe-uninitialized +else ifeq ($(CONFIG_RTE_ARCH_PPC_64),y) +SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_altivec.c else SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_sse.c #check if flag for SSE4.1 is already on, if not set it up manually diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h index 09d67841..6664a55e 100644 --- a/lib/librte_acl/acl.h +++ b/lib/librte_acl/acl.h @@ -234,6 +234,10 @@ int rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data, uint32_t *results, uint32_t num, uint32_t categories); +int +rte_acl_classify_altivec(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/lib/librte_acl/acl_run.h b/lib/librte_acl/acl_run.h index b2fc42c6..024f3931 100644 --- a/lib/librte_acl/acl_run.h +++ b/lib/librte_acl/acl_run.h @@ -39,7 +39,9 @@ #define MAX_SEARCHES_AVX16 16 #define MAX_SEARCHES_SSE8 8 +#define MAX_SEARCHES_ALTIVEC8 8 #define MAX_SEARCHES_SSE4 4 +#define MAX_SEARCHES_ALTIVEC4 4 #define MAX_SEARCHES_SCALAR 2 #define GET_NEXT_4BYTES(prm, idx) \ diff --git a/lib/librte_acl/acl_run_altivec.c b/lib/librte_acl/acl_run_altivec.c new file mode 100644 index 00000000..35235260 --- /dev/null +++ b/lib/librte_acl/acl_run_altivec.c @@ -0,0 +1,47 @@ +/*- + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2016. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "acl_run_altivec.h" + +int +rte_acl_classify_altivec(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories) +{ + if (likely(num >= MAX_SEARCHES_ALTIVEC8)) + return search_altivec_8(ctx, data, results, num, categories); + else if (num >= MAX_SEARCHES_ALTIVEC4) + return search_altivec_4(ctx, data, results, num, categories); + else + return rte_acl_classify_scalar(ctx, data, results, num, + categories); +} diff --git a/lib/librte_acl/acl_run_altivec.h b/lib/librte_acl/acl_run_altivec.h new file mode 100644 index 00000000..7d329bcf --- /dev/null +++ b/lib/librte_acl/acl_run_altivec.h @@ -0,0 +1,329 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "acl_run.h" +#include "acl_vect.h" + +struct _altivec_acl_const { + rte_xmm_t xmm_shuffle_input; + rte_xmm_t xmm_index_mask; + rte_xmm_t xmm_ones_16; + rte_xmm_t range_base; +} altivec_acl_const __attribute__((aligned(RTE_CACHE_LINE_SIZE))) = { + { + .u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c} + }, + { + .u32 = {RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX} + }, + { + .u16 = {1, 1, 1, 1, 1, 1, 1, 1} + }, + { + .u32 = {0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c} + }, +}; + +/* + * Resolve priority for multiple results (altivec version). + * This consists comparing the priority of the current traversal with the + * running set of results for the packet. + * For each result, keep a running array of the result (rule number) and + * its priority for each category. + */ +static inline void +resolve_priority_altivec(uint64_t transition, int n, + const struct rte_acl_ctx *ctx, struct parms *parms, + const struct rte_acl_match_results *p, uint32_t categories) +{ + uint32_t x; + xmm_t results, priority, results1, priority1; + vector bool int selector; + xmm_t *saved_results, *saved_priority; + + for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) { + + saved_results = (xmm_t *)(&parms[n].cmplt->results[x]); + saved_priority = + (xmm_t *)(&parms[n].cmplt->priority[x]); + + /* get results and priorities for completed trie */ + results = *(const xmm_t *)&p[transition].results[x]; + priority = *(const xmm_t *)&p[transition].priority[x]; + + /* if this is not the first completed trie */ + if (parms[n].cmplt->count != ctx->num_tries) { + + /* get running best results and their priorities */ + results1 = *saved_results; + priority1 = *saved_priority; + + /* select results that are highest priority */ + selector = vec_cmpgt(priority1, priority); + results = vec_sel(results, results1, selector); + priority = vec_sel(priority, priority1, + selector); + } + + /* save running best results and their priorities */ + *saved_results = results; + *saved_priority = priority; + } +} + +/* + * Check for any match in 4 transitions + */ +static inline __attribute__((always_inline)) uint32_t +check_any_match_x4(uint64_t val[]) +{ + return (val[0] | val[1] | val[2] | val[3]) & RTE_ACL_NODE_MATCH; +} + +static inline __attribute__((always_inline)) void +acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms, + struct acl_flow_data *flows, uint64_t transitions[]) +{ + while (check_any_match_x4(transitions)) { + transitions[0] = acl_match_check(transitions[0], slot, ctx, + parms, flows, resolve_priority_altivec); + transitions[1] = acl_match_check(transitions[1], slot + 1, ctx, + parms, flows, resolve_priority_altivec); + transitions[2] = acl_match_check(transitions[2], slot + 2, ctx, + parms, flows, resolve_priority_altivec); + transitions[3] = acl_match_check(transitions[3], slot + 3, ctx, + parms, flows, resolve_priority_altivec); + } +} + +/* + * Process 4 transitions (in 2 XMM registers) in parallel + */ +static inline __attribute__((optimize("O2"))) xmm_t +transition4(xmm_t next_input, const uint64_t *trans, + xmm_t *indices1, xmm_t *indices2) +{ + xmm_t addr, tr_lo, tr_hi; + xmm_t in, node_type, r, t; + xmm_t dfa_ofs, quad_ofs; + xmm_t *index_mask, *tp; + vector bool int dfa_msk; + vector signed char zeroes = {}; + union { + uint64_t d64[2]; + uint32_t d32[4]; + } v; + + /* Move low 32 into tr_lo and high 32 into tr_hi */ + tr_lo = (xmm_t){(*indices1)[0], (*indices1)[2], + (*indices2)[0], (*indices2)[2]}; + tr_hi = (xmm_t){(*indices1)[1], (*indices1)[3], + (*indices2)[1], (*indices2)[3]}; + + /* Calculate the address (array index) for all 4 transitions. */ + index_mask = (xmm_t *)&altivec_acl_const.xmm_index_mask.u32; + t = vec_xor(*index_mask, *index_mask); + in = vec_perm(next_input, (xmm_t){}, + *(vector unsigned char *)&altivec_acl_const.xmm_shuffle_input); + + /* Calc node type and node addr */ + node_type = vec_and(vec_nor(*index_mask, *index_mask), tr_lo); + addr = vec_and(tr_lo, *index_mask); + + /* mask for DFA type(0) nodes */ + dfa_msk = vec_cmpeq(node_type, t); + + /* DFA calculations. */ + r = vec_sr(in, (vector unsigned int){30, 30, 30, 30}); + tp = (xmm_t *)&altivec_acl_const.range_base.u32; + r = vec_add(r, *tp); + t = vec_sr(in, (vector unsigned int){24, 24, 24, 24}); + r = vec_perm(tr_hi, (xmm_t){(uint16_t)0 << 16}, + (vector unsigned char)r); + + dfa_ofs = vec_sub(t, r); + + /* QUAD/SINGLE caluclations. */ + t = (xmm_t)vec_cmpgt((vector signed char)in, (vector signed char)tr_hi); + t = (xmm_t)vec_sel( + vec_sel( + (vector signed char)vec_sub( + zeroes, (vector signed char)t), + (vector signed char)t, + vec_cmpgt((vector signed char)t, zeroes)), + zeroes, + vec_cmpeq((vector signed char)t, zeroes)); + + t = (xmm_t)vec_msum((vector signed char)t, + (vector unsigned char)t, (xmm_t){}); + quad_ofs = (xmm_t)vec_msum((vector signed short)t, + *(vector signed short *)&altivec_acl_const.xmm_ones_16.u16, + (xmm_t){}); + + /* blend DFA and QUAD/SINGLE. */ + t = vec_sel(quad_ofs, dfa_ofs, dfa_msk); + + /* calculate address for next transitions. */ + addr = vec_add(addr, t); + + v.d64[0] = (uint64_t)trans[addr[0]]; + v.d64[1] = (uint64_t)trans[addr[1]]; + *indices1 = (xmm_t){v.d32[0], v.d32[1], v.d32[2], v.d32[3]}; + v.d64[0] = (uint64_t)trans[addr[2]]; + v.d64[1] = (uint64_t)trans[addr[3]]; + *indices2 = (xmm_t){v.d32[0], v.d32[1], v.d32[2], v.d32[3]}; + + return vec_sr(next_input, + (vector unsigned int){CHAR_BIT, CHAR_BIT, CHAR_BIT, CHAR_BIT}); +} + +/* + * Execute trie traversal with 8 traversals in parallel + */ +static inline int +search_altivec_8(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t total_packets, uint32_t categories) +{ + int n; + struct acl_flow_data flows; + uint64_t index_array[MAX_SEARCHES_ALTIVEC8]; + struct completion cmplt[MAX_SEARCHES_ALTIVEC8]; + struct parms parms[MAX_SEARCHES_ALTIVEC8]; + xmm_t input0, input1; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, + total_packets, categories, ctx->trans_table); + + for (n = 0; n < MAX_SEARCHES_ALTIVEC8; n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, (uint64_t *)&index_array[0]); + acl_match_check_x4(4, ctx, parms, &flows, (uint64_t *)&index_array[4]); + + while (flows.started > 0) { + + /* Gather 4 bytes of input data for each stream. */ + input0 = (xmm_t){GET_NEXT_4BYTES(parms, 0), + GET_NEXT_4BYTES(parms, 1), + GET_NEXT_4BYTES(parms, 2), + GET_NEXT_4BYTES(parms, 3)}; + + input1 = (xmm_t){GET_NEXT_4BYTES(parms, 4), + GET_NEXT_4BYTES(parms, 5), + GET_NEXT_4BYTES(parms, 6), + GET_NEXT_4BYTES(parms, 7)}; + + /* Process the 4 bytes of input on each stream. */ + + input0 = transition4(input0, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input1 = transition4(input1, flows.trans, + (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]); + + input0 = transition4(input0, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input1 = transition4(input1, flows.trans, + (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]); + + input0 = transition4(input0, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input1 = transition4(input1, flows.trans, + (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]); + + input0 = transition4(input0, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input1 = transition4(input1, flows.trans, + (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, + (uint64_t *)&index_array[0]); + acl_match_check_x4(4, ctx, parms, &flows, + (uint64_t *)&index_array[4]); + } + + return 0; +} + +/* + * Execute trie traversal with 4 traversals in parallel + */ +static inline int +search_altivec_4(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, int total_packets, uint32_t categories) +{ + int n; + struct acl_flow_data flows; + uint64_t index_array[MAX_SEARCHES_ALTIVEC4]; + struct completion cmplt[MAX_SEARCHES_ALTIVEC4]; + struct parms parms[MAX_SEARCHES_ALTIVEC4]; + xmm_t input; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, + total_packets, categories, ctx->trans_table); + + for (n = 0; n < MAX_SEARCHES_ALTIVEC4; n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, index_array); + + while (flows.started > 0) { + + /* Gather 4 bytes of input data for each stream. */ + input = (xmm_t){GET_NEXT_4BYTES(parms, 0), + GET_NEXT_4BYTES(parms, 1), + GET_NEXT_4BYTES(parms, 2), + GET_NEXT_4BYTES(parms, 3)}; + + /* Process the 4 bytes of input on each stream. */ + input = transition4(input, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input = transition4(input, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input = transition4(input, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + input = transition4(input, flows.trans, + (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, index_array); + } + + return 0; +} diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c index 4ba9786b..8b7e92ce 100644 --- a/lib/librte_acl/rte_acl.c +++ b/lib/librte_acl/rte_acl.c @@ -75,12 +75,23 @@ rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx, return -ENOTSUP; } +int __attribute__ ((weak)) +rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx, + __rte_unused const uint8_t **data, + __rte_unused uint32_t *results, + __rte_unused uint32_t num, + __rte_unused uint32_t categories) +{ + return -ENOTSUP; +} + static const rte_acl_classify_t classify_fns[] = { [RTE_ACL_CLASSIFY_DEFAULT] = rte_acl_classify_scalar, [RTE_ACL_CLASSIFY_SCALAR] = rte_acl_classify_scalar, [RTE_ACL_CLASSIFY_SSE] = rte_acl_classify_sse, [RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2, [RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon, + [RTE_ACL_CLASSIFY_ALTIVEC] = rte_acl_classify_altivec, }; /* by default, use always available scalar code path. */ @@ -119,6 +130,8 @@ rte_acl_init(void) #elif defined(RTE_ARCH_ARM) if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) alg = RTE_ACL_CLASSIFY_NEON; +#elif defined(RTE_ARCH_PPC_64) + alg = RTE_ACL_CLASSIFY_ALTIVEC; #else #ifdef CC_AVX2_SUPPORT if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h index 0979a098..caa91f7e 100644 --- a/lib/librte_acl/rte_acl.h +++ b/lib/librte_acl/rte_acl.h @@ -144,7 +144,7 @@ struct rte_acl_rule_data { struct rte_acl_field field[fld_num]; \ } -RTE_ACL_RULE_DEF(rte_acl_rule, 0); +RTE_ACL_RULE_DEF(rte_acl_rule,); #define RTE_ACL_RULE_SZ(fld_num) \ (sizeof(struct rte_acl_rule) + sizeof(struct rte_acl_field) * (fld_num)) @@ -271,6 +271,7 @@ enum rte_acl_classify_alg { RTE_ACL_CLASSIFY_SSE = 2, /**< requires SSE4.1 support. */ RTE_ACL_CLASSIFY_AVX2 = 3, /**< requires AVX2 support. */ RTE_ACL_CLASSIFY_NEON = 4, /**< requires NEON support. */ + RTE_ACL_CLASSIFY_ALTIVEC = 5, /**< requires ALTIVEC support. */ RTE_ACL_CLASSIFY_NUM /* should always be the last one. */ }; diff --git a/lib/librte_cfgfile/rte_cfgfile.h b/lib/librte_cfgfile/rte_cfgfile.h index f649836c..b40e6a13 100644 --- a/lib/librte_cfgfile/rte_cfgfile.h +++ b/lib/librte_cfgfile/rte_cfgfile.h @@ -34,6 +34,8 @@ #ifndef __INCLUDE_RTE_CFGFILE_H__ #define __INCLUDE_RTE_CFGFILE_H__ +#include + #ifdef __cplusplus extern "C" { #endif @@ -86,7 +88,7 @@ struct rte_cfgfile *rte_cfgfile_load(const char *filename, int flags); * @param length * Maximum section name length * @return -* 0 on success, error code otherwise +* Number of sections */ int rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sec_name, size_t length); @@ -100,13 +102,13 @@ int rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sec_name, * @param cfg * Config file * @param sections -* Array containing section names after successful invocation. Each elemen +* Array containing section names after successful invocation. Each element * of this array should be preallocated by the user with at least * CFG_NAME_LEN characters. * @param max_sections * Maximum number of section names to be stored in sections array * @return -* 0 on success, error code otherwise +* Number of populated sections names */ int rte_cfgfile_sections(struct rte_cfgfile *cfg, char *sections[], int max_sections); @@ -134,12 +136,13 @@ int rte_cfgfile_has_section(struct rte_cfgfile *cfg, const char *sectionname); * @param sectionname * Section name * @return -* Number of entries in section +* Number of entries in section on success, -1 otherwise */ int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg, const char *sectionname); -/** Get section entries as key-value pairs +/** +* Get section entries as key-value pairs * * If multiple sections have the given name this function operates on the * first one. @@ -154,14 +157,15 @@ int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg, * @param max_entries * Maximum number of section entries to be stored in entries array * @return -* 0 on success, error code otherwise +* Number of entries populated on success, -1 otherwise */ int rte_cfgfile_section_entries(struct rte_cfgfile *cfg, const char *sectionname, struct rte_cfgfile_entry *entries, int max_entries); -/** Get section entries as key-value pairs +/** +* Get section entries as key-value pairs * * The index of a section is the same as the index of its name in the * result of rte_cfgfile_sections. This API can be used when there are @@ -180,7 +184,7 @@ int rte_cfgfile_section_entries(struct rte_cfgfile *cfg, * @param max_entries * Maximum number of section entries to be stored in entries array * @return -* Number of entries populated on success, negative error code otherwise +* Number of entries populated on success, -1 otherwise */ int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg, int index, @@ -188,7 +192,8 @@ int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg, struct rte_cfgfile_entry *entries, int max_entries); -/** Get value of the named entry in named config file section +/** +* Get value of the named entry in named config file section * * If multiple sections have the given name this function operates on the * first one. @@ -200,13 +205,14 @@ int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg, * @param entryname * Entry name * @return -* Entry value +* Entry value on success, NULL otherwise */ const char *rte_cfgfile_get_entry(struct rte_cfgfile *cfg, const char *sectionname, const char *entryname); -/** Check if given entry exists in named config file section +/** +* Check if given entry exists in named config file section * * If multiple sections have the given name this function operates on the * first one. @@ -223,12 +229,13 @@ const char *rte_cfgfile_get_entry(struct rte_cfgfile *cfg, int rte_cfgfile_has_entry(struct rte_cfgfile *cfg, const char *sectionname, const char *entryname); -/** Close config file +/** +* Close config file * * @param cfg * Config file * @return -* 0 on success, error code otherwise +* 0 on success, -1 otherwise */ int rte_cfgfile_close(struct rte_cfgfile *cfg); diff --git a/lib/librte_cmdline/cmdline.h b/lib/librte_cmdline/cmdline.h index 2578ca81..65d73b01 100644 --- a/lib/librte_cmdline/cmdline.h +++ b/lib/librte_cmdline/cmdline.h @@ -63,6 +63,7 @@ #include #include +#include /** * @file diff --git a/lib/librte_cmdline/cmdline_parse_portlist.h b/lib/librte_cmdline/cmdline_parse_portlist.h index 73d70e05..058df3ee 100644 --- a/lib/librte_cmdline/cmdline_parse_portlist.h +++ b/lib/librte_cmdline/cmdline_parse_portlist.h @@ -61,6 +61,7 @@ #ifndef _PARSE_PORTLIST_H_ #define _PARSE_PORTLIST_H_ +#include #include #ifdef __cplusplus diff --git a/lib/librte_cmdline/cmdline_socket.h b/lib/librte_cmdline/cmdline_socket.h index 8cc2dfbc..aa6068e7 100644 --- a/lib/librte_cmdline/cmdline_socket.h +++ b/lib/librte_cmdline/cmdline_socket.h @@ -61,6 +61,9 @@ #ifndef _CMDLINE_SOCKET_H_ #define _CMDLINE_SOCKET_H_ +#include +#include + #ifdef __cplusplus extern "C" { #endif diff --git a/lib/librte_cryptodev/Makefile b/lib/librte_cryptodev/Makefile index 314a0466..aebf5d9f 100644 --- a/lib/librte_cryptodev/Makefile +++ b/lib/librte_cryptodev/Makefile @@ -34,7 +34,7 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_cryptodev.a # library version -LIBABIVER := 1 +LIBABIVER := 2 # build flags CFLAGS += -O3 diff --git a/lib/librte_cryptodev/rte_crypto.h b/lib/librte_cryptodev/rte_crypto.h index 5bc3eaa7..90195188 100644 --- a/lib/librte_cryptodev/rte_crypto.h +++ b/lib/librte_cryptodev/rte_crypto.h @@ -48,6 +48,7 @@ extern "C" { #include #include #include +#include #include "rte_crypto_sym.h" @@ -111,6 +112,7 @@ struct rte_crypto_op { void *opaque_data; /**< Opaque pointer for user data */ + RTE_STD_C11 union { struct rte_crypto_sym_op *sym; /**< Symmetric operation parameters */ diff --git a/lib/librte_cryptodev/rte_crypto_sym.h b/lib/librte_cryptodev/rte_crypto_sym.h index d9bd8210..d3d38e4f 100644 --- a/lib/librte_cryptodev/rte_crypto_sym.h +++ b/lib/librte_cryptodev/rte_crypto_sym.h @@ -51,6 +51,7 @@ extern "C" { #include #include #include +#include /** Symmetric Cipher Algorithms */ @@ -83,11 +84,11 @@ enum rte_crypto_cipher_algorithm { /**< AES algorithm in F8 mode */ RTE_CRYPTO_CIPHER_AES_GCM, /**< AES algorithm in GCM mode. When this cipher algorithm is used the - * *RTE_CRYPTO_AUTH_AES_GCM* element of the - * *rte_crypto_auth_algorithm* enum MUST be used to set up the related - * *rte_crypto_auth_setup_data* structure in the session context or in - * the op_params of the crypto operation structure in the case of a - * session-less crypto operation. + * *RTE_CRYPTO_AUTH_AES_GCM* or *RTE_CRYPTO_AUTH_AES_GMAC* element + * of the *rte_crypto_auth_algorithm* enum MUST be used to set up + * the related *rte_crypto_auth_setup_data* structure in the session + * context or in the op_params of the crypto operation structure + * in the case of a session-less crypto operation. */ RTE_CRYPTO_CIPHER_AES_XTS, /**< AES algorithm in XTS mode */ @@ -96,10 +97,10 @@ enum rte_crypto_cipher_algorithm { /**< (A)RC4 cipher algorithm */ RTE_CRYPTO_CIPHER_KASUMI_F8, - /**< Kasumi algorithm in F8 mode */ + /**< KASUMI algorithm in F8 mode */ RTE_CRYPTO_CIPHER_SNOW3G_UEA2, - /**< SNOW3G algorithm in UEA2 mode */ + /**< SNOW 3G algorithm in UEA2 mode */ RTE_CRYPTO_CIPHER_ZUC_EEA3, /**< ZUC algorithm in EEA3 mode */ @@ -203,7 +204,7 @@ enum rte_crypto_auth_algorithm { /**< AES XCBC algorithm. */ RTE_CRYPTO_AUTH_KASUMI_F9, - /**< Kasumi algorithm in F9 mode. */ + /**< KASUMI algorithm in F9 mode. */ RTE_CRYPTO_AUTH_MD5, /**< MD5 algorithm */ @@ -232,7 +233,7 @@ enum rte_crypto_auth_algorithm { /**< HMAC using 512 bit SHA algorithm. */ RTE_CRYPTO_AUTH_SNOW3G_UIA2, - /**< SNOW3G algorithm in UIA2 mode. */ + /**< SNOW 3G algorithm in UIA2 mode. */ RTE_CRYPTO_AUTH_ZUC_EIA3, /**< ZUC algorithm in EIA3 mode */ @@ -290,7 +291,7 @@ struct rte_crypto_auth_xform { * This field must be specified when the hash algorithm is one of the * following: * - * - For SNOW3G (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2), this is the + * - For SNOW 3G (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2), this is the * length of the IV (which should be 16). * * - For GCM (@ref RTE_CRYPTO_AUTH_AES_GCM). In this case, this is @@ -307,8 +308,8 @@ struct rte_crypto_auth_xform { * @note * For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) mode of operation * this field is not used and should be set to 0. Instead the length - * of the AAD data is specified in the message length to hash field of - * the rte_crypto_sym_op_data structure. + * of the AAD data is specified in additional authentication data + * length field of the rte_crypto_sym_op_data structure */ }; @@ -333,6 +334,7 @@ struct rte_crypto_sym_xform { /**< next xform in chain */ enum rte_crypto_sym_xform_type type ; /**< xform type */ + RTE_STD_C11 union { struct rte_crypto_auth_xform auth; /**< Authentication / hash xform */ @@ -364,6 +366,25 @@ struct rte_cryptodev_sym_session; * it must have a valid *rte_mbuf* structure attached, via m_src parameter, * which contains the source data which the crypto operation is to be performed * on. + * While the mbuf is in use by a crypto operation no part of the mbuf should be + * changed by the application as the device may read or write to any part of the + * mbuf. In the case of hardware crypto devices some or all of the mbuf + * may be DMAed in and out of the device, so writing over the original data, + * though only the part specified by the rte_crypto_sym_op for transformation + * will be changed. + * Out-of-place (OOP) operation, where the source mbuf is different to the + * destination mbuf, is a special case. Data will be copied from m_src to m_dst. + * The part copied includes all the parts of the source mbuf that will be + * operated on, based on the cipher.data.offset+cipher.data.length and + * auth.data.offset+auth.data.length values in the rte_crypto_sym_op. The part + * indicated by the cipher parameters will be transformed, any extra data around + * this indicated by the auth parameters will be copied unchanged from source to + * destination mbuf. + * Also in OOP operation the cipher.data.offset and auth.data.offset apply to + * both source and destination mbufs. As these offsets are relative to the + * data_off parameter in each mbuf this can result in the data written to the + * destination buffer being at a different alignment, relative to buffer start, + * to the data in the source buffer. */ struct rte_crypto_sym_op { struct rte_mbuf *m_src; /**< source mbuf */ @@ -371,6 +392,7 @@ struct rte_crypto_sym_op { enum rte_crypto_sym_op_sess_type sess_type; + RTE_STD_C11 union { struct rte_cryptodev_sym_session *session; /**< Handle for the initialised session context */ @@ -388,8 +410,9 @@ struct rte_crypto_sym_op { * this location. * * @note - * For Snow3G @ RTE_CRYPTO_CIPHER_SNOW3G_UEA2 - * and KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8, + * For SNOW 3G @ RTE_CRYPTO_CIPHER_SNOW3G_UEA2, + * KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8 + * and ZUC @ RTE_CRYPTO_CIPHER_ZUC_EEA3, * this field should be in bits. */ @@ -413,8 +436,9 @@ struct rte_crypto_sym_op { * field should be set to 0. * * @note - * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UEA2 - * and KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8, + * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UEA2, + * KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8 + * and ZUC @ RTE_CRYPTO_CIPHER_ZUC_EEA3, * this field should be in bits. */ } data; /**< Data offsets and length for ciphering */ @@ -423,8 +447,8 @@ struct rte_crypto_sym_op { uint8_t *data; /**< Initialisation Vector or Counter. * - * - For block ciphers in CBC or F8 mode, or for Kasumi - * in F8 mode, or for SNOW3G in UEA2 mode, this is the + * - For block ciphers in CBC or F8 mode, or for KASUMI + * in F8 mode, or for SNOW 3G in UEA2 mode, this is the * Initialisation Vector (IV) value. * * - For block ciphers in CTR mode, this is the counter. @@ -451,8 +475,8 @@ struct rte_crypto_sym_op { uint16_t length; /**< Length of valid IV data. * - * - For block ciphers in CBC or F8 mode, or for Kasumi - * in F8 mode, or for SNOW3G in UEA2 mode, this is the + * - For block ciphers in CBC or F8 mode, or for KASUMI + * in F8 mode, or for SNOW 3G in UEA2 mode, this is the * length of the IV (which must be the same as the * block length of the cipher). * @@ -482,12 +506,14 @@ struct rte_crypto_sym_op { * should be set instead. * * @note For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) - * mode of operation, this field specifies the start - * of the AAD data in the source buffer. + * mode of operation, this field is set to 0. aad data + * pointer of rte_crypto_sym_op_data structure is + * used instead * * @note - * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2 - * and KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9, + * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2, + * KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9 + * and ZUC @ RTE_CRYPTO_AUTH_ZUC_EIA3, * this field should be in bits. */ @@ -502,12 +528,13 @@ struct rte_crypto_sym_op { * * @note * For AES-GMAC @ref RTE_CRYPTO_AUTH_AES_GMAC mode - * of operation, this field specifies the length of - * the AAD data in the source buffer. + * of operation, this field is set to 0. + * Auth.aad.length is used instead. * * @note - * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2 - * and KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9, + * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2, + * KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9 + * and ZUC @ RTE_CRYPTO_AUTH_ZUC_EIA3, * this field should be in bits. */ } data; /**< Data offsets and length for authentication */ @@ -551,7 +578,7 @@ struct rte_crypto_sym_op { uint8_t *data; /**< Pointer to Additional Authenticated Data (AAD) * needed for authenticated cipher mechanisms (CCM and - * GCM), and to the IV for SNOW3G authentication + * GCM), and to the IV for SNOW 3G authentication * (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2). For other * authentication mechanisms this pointer is ignored. * @@ -589,9 +616,7 @@ struct rte_crypto_sym_op { * * @note * For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) mode of - * operation, this field is not used and should be set - * to 0. Instead the AAD data should be placed in the - * source buffer. + * operation, this field is used to pass plaintext. */ phys_addr_t phys_addr; /**< physical address */ uint16_t length; /**< Length of digest */ diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c index fc4123b6..127e8d0d 100644 --- a/lib/librte_cryptodev/rte_cryptodev.c +++ b/lib/librte_cryptodev/rte_cryptodev.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include #include @@ -319,7 +318,7 @@ rte_cryptodev_find_free_device_index(void) } struct rte_cryptodev * -rte_cryptodev_pmd_allocate(const char *name, enum pmd_type type, int socket_id) +rte_cryptodev_pmd_allocate(const char *name, int socket_id) { struct rte_cryptodev *cryptodev; uint8_t dev_id; @@ -358,7 +357,6 @@ rte_cryptodev_pmd_allocate(const char *name, enum pmd_type type, int socket_id) cryptodev->data->dev_started = 0; cryptodev->attached = RTE_CRYPTODEV_ATTACHED; - cryptodev->pmd_type = type; cryptodev_globals.nb_devs++; } @@ -366,23 +364,6 @@ rte_cryptodev_pmd_allocate(const char *name, enum pmd_type type, int socket_id) return cryptodev; } -static inline int -rte_cryptodev_create_unique_device_name(char *name, size_t size, - struct rte_pci_device *pci_dev) -{ - int ret; - - if ((name == NULL) || (pci_dev == NULL)) - return -EINVAL; - - ret = snprintf(name, size, "%d:%d.%d", - pci_dev->addr.bus, pci_dev->addr.devid, - pci_dev->addr.function); - if (ret < 0) - return ret; - return 0; -} - int rte_cryptodev_pmd_release_device(struct rte_cryptodev *cryptodev) { @@ -407,7 +388,7 @@ rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size, struct rte_cryptodev *cryptodev; /* allocate device structure */ - cryptodev = rte_cryptodev_pmd_allocate(name, PMD_VDEV, socket_id); + cryptodev = rte_cryptodev_pmd_allocate(name, socket_id); if (cryptodev == NULL) return NULL; @@ -430,9 +411,9 @@ rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size, return cryptodev; } -static int -rte_cryptodev_init(struct rte_pci_driver *pci_drv, - struct rte_pci_device *pci_dev) +int +rte_cryptodev_pci_probe(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev) { struct rte_cryptodev_driver *cryptodrv; struct rte_cryptodev *cryptodev; @@ -445,12 +426,10 @@ rte_cryptodev_init(struct rte_pci_driver *pci_drv, if (cryptodrv == NULL) return -ENODEV; - /* Create unique Crypto device name using PCI address */ - rte_cryptodev_create_unique_device_name(cryptodev_name, - sizeof(cryptodev_name), pci_dev); + rte_eal_pci_device_name(&pci_dev->addr, cryptodev_name, + sizeof(cryptodev_name)); - cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, PMD_PDEV, - rte_socket_id()); + cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, rte_socket_id()); if (cryptodev == NULL) return -ENOMEM; @@ -479,7 +458,7 @@ rte_cryptodev_init(struct rte_pci_driver *pci_drv, return 0; CDEV_LOG_ERR("driver %s: crypto_dev_init(vendor_id=0x%x device_id=0x%x)" - " failed", pci_drv->name, + " failed", pci_drv->driver.name, (unsigned) pci_dev->id.vendor_id, (unsigned) pci_dev->id.device_id); @@ -492,8 +471,8 @@ rte_cryptodev_init(struct rte_pci_driver *pci_drv, return -ENXIO; } -static int -rte_cryptodev_uninit(struct rte_pci_device *pci_dev) +int +rte_cryptodev_pci_remove(struct rte_pci_device *pci_dev) { const struct rte_cryptodev_driver *cryptodrv; struct rte_cryptodev *cryptodev; @@ -503,9 +482,8 @@ rte_cryptodev_uninit(struct rte_pci_device *pci_dev) if (pci_dev == NULL) return -EINVAL; - /* Create unique device name using PCI address */ - rte_cryptodev_create_unique_device_name(cryptodev_name, - sizeof(cryptodev_name), pci_dev); + rte_eal_pci_device_name(&pci_dev->addr, cryptodev_name, + sizeof(cryptodev_name)); cryptodev = rte_cryptodev_pmd_get_named_dev(cryptodev_name); if (cryptodev == NULL) @@ -535,28 +513,6 @@ rte_cryptodev_uninit(struct rte_pci_device *pci_dev) return 0; } -int -rte_cryptodev_pmd_driver_register(struct rte_cryptodev_driver *cryptodrv, - enum pmd_type type) -{ - /* Call crypto device initialization directly if device is virtual */ - if (type == PMD_VDEV) - return rte_cryptodev_init((struct rte_pci_driver *)cryptodrv, - NULL); - - /* - * Register PCI driver for physical device intialisation during - * PCI probing - */ - cryptodrv->pci_drv.devinit = rte_cryptodev_init; - cryptodrv->pci_drv.devuninit = rte_cryptodev_uninit; - - rte_eal_pci_register(&cryptodrv->pci_drv); - - return 0; -} - - uint16_t rte_cryptodev_queue_pair_count(uint8_t dev_id) { @@ -913,7 +869,7 @@ rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info) dev_info->pci_dev = dev->pci_dev; if (dev->driver) - dev_info->driver_name = dev->driver->pci_drv.name; + dev_info->driver_name = dev->driver->pci_drv.driver.name; } diff --git a/lib/librte_cryptodev/rte_cryptodev.h b/lib/librte_cryptodev/rte_cryptodev.h index affbdecc..8f63e8f6 100644 --- a/lib/librte_cryptodev/rte_cryptodev.h +++ b/lib/librte_cryptodev/rte_cryptodev.h @@ -48,18 +48,23 @@ extern "C" { #include "rte_kvargs.h" #include "rte_crypto.h" #include "rte_dev.h" +#include -#define CRYPTODEV_NAME_NULL_PMD cryptodev_null_pmd +#define CRYPTODEV_NAME_NULL_PMD crypto_null /**< Null crypto PMD device name */ -#define CRYPTODEV_NAME_AESNI_MB_PMD cryptodev_aesni_mb_pmd +#define CRYPTODEV_NAME_AESNI_MB_PMD crypto_aesni_mb /**< AES-NI Multi buffer PMD device name */ -#define CRYPTODEV_NAME_AESNI_GCM_PMD cryptodev_aesni_gcm_pmd +#define CRYPTODEV_NAME_AESNI_GCM_PMD crypto_aesni_gcm /**< AES-NI GCM PMD device name */ -#define CRYPTODEV_NAME_QAT_SYM_PMD cryptodev_qat_sym_pmd +#define CRYPTODEV_NAME_OPENSSL_PMD crypto_openssl +/**< Open SSL Crypto PMD device name */ +#define CRYPTODEV_NAME_QAT_SYM_PMD crypto_qat /**< Intel QAT Symmetric Crypto PMD device name */ -#define CRYPTODEV_NAME_SNOW3G_PMD cryptodev_snow3g_pmd +#define CRYPTODEV_NAME_SNOW3G_PMD crypto_snow3g /**< SNOW 3G PMD device name */ -#define CRYPTODEV_NAME_KASUMI_PMD cryptodev_kasumi_pmd +#define CRYPTODEV_NAME_KASUMI_PMD crypto_kasumi +/**< KASUMI PMD device name */ +#define CRYPTODEV_NAME_ZUC_PMD crypto_zuc /**< KASUMI PMD device name */ /** Crypto device type */ @@ -70,32 +75,38 @@ enum rte_cryptodev_type { RTE_CRYPTODEV_QAT_SYM_PMD, /**< QAT PMD Symmetric Crypto */ RTE_CRYPTODEV_SNOW3G_PMD, /**< SNOW 3G PMD */ RTE_CRYPTODEV_KASUMI_PMD, /**< KASUMI PMD */ + RTE_CRYPTODEV_ZUC_PMD, /**< ZUC PMD */ + RTE_CRYPTODEV_OPENSSL_PMD, /**< OpenSSL PMD */ }; extern const char **rte_cyptodev_names; /* Logging Macros */ -#define CDEV_LOG_ERR(fmt, args...) \ - RTE_LOG(ERR, CRYPTODEV, "%s() line %u: " fmt "\n", \ - __func__, __LINE__, ## args) +#define CDEV_LOG_ERR(...) \ + RTE_LOG(ERR, CRYPTODEV, \ + RTE_FMT("%s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \ + __func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,))) -#define CDEV_PMD_LOG_ERR(dev, fmt, args...) \ - RTE_LOG(ERR, CRYPTODEV, "[%s] %s() line %u: " fmt "\n", \ - dev, __func__, __LINE__, ## args) +#define CDEV_PMD_LOG_ERR(dev, ...) \ + RTE_LOG(ERR, CRYPTODEV, \ + RTE_FMT("[%s] %s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \ + dev, __func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,))) #ifdef RTE_LIBRTE_CRYPTODEV_DEBUG -#define CDEV_LOG_DEBUG(fmt, args...) \ - RTE_LOG(DEBUG, CRYPTODEV, "%s() line %u: " fmt "\n", \ - __func__, __LINE__, ## args) \ +#define CDEV_LOG_DEBUG(...) \ + RTE_LOG(DEBUG, CRYPTODEV, \ + RTE_FMT("%s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \ + __func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,))) -#define CDEV_PMD_TRACE(fmt, args...) \ - RTE_LOG(DEBUG, CRYPTODEV, "[%s] %s: " fmt "\n", \ - dev, __func__, ## args) +#define CDEV_PMD_TRACE(...) \ + RTE_LOG(DEBUG, CRYPTODEV, \ + RTE_FMT("[%s] %s: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \ + dev, __func__, RTE_FMT_TAIL(__VA_ARGS__,))) #else -#define CDEV_LOG_DEBUG(fmt, args...) -#define CDEV_PMD_TRACE(fmt, args...) +#define CDEV_LOG_DEBUG(...) (void)0 +#define CDEV_PMD_TRACE(...) (void)0 #endif /** @@ -104,6 +115,7 @@ extern const char **rte_cyptodev_names; struct rte_cryptodev_symmetric_capability { enum rte_crypto_sym_xform_type xform_type; /**< Transform type : Authentication / Cipher */ + RTE_STD_C11 union { struct { enum rte_crypto_auth_algorithm algo; @@ -177,6 +189,7 @@ struct rte_cryptodev_capabilities { enum rte_crypto_op_type op; /**< Operation type */ + RTE_STD_C11 union { struct rte_cryptodev_symmetric_capability sym; /**< Symmetric operation capability parameters */ @@ -613,12 +626,11 @@ struct rte_cryptodev { enum rte_cryptodev_type dev_type; /**< Crypto device type */ - enum pmd_type pmd_type; - /**< PMD type - PDEV / VDEV */ struct rte_cryptodev_cb_list link_intr_cbs; /**< User application callback for interrupts if present */ + __extension__ uint8_t attached : 1; /**< Flag indicating the device is attached */ } __rte_cache_aligned; @@ -642,6 +654,7 @@ struct rte_cryptodev_data { char name[RTE_CRYPTODEV_NAME_MAX_LEN]; /**< Unique identifier name */ + __extension__ uint8_t dev_started : 1; /**< Device state: STARTED(1)/STOPPED(0) */ @@ -749,6 +762,7 @@ rte_cryptodev_enqueue_burst(uint8_t dev_id, uint16_t qp_id, /** Cryptodev symmetric crypto session */ struct rte_cryptodev_sym_session { + RTE_STD_C11 struct { uint8_t dev_id; /**< Device Id */ @@ -759,7 +773,7 @@ struct rte_cryptodev_sym_session { } __rte_aligned(8); /**< Public symmetric session details */ - char _private[0]; + __extension__ char _private[0]; /**< Private session material */ }; diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h index 7d049ea3..abfe2dc1 100644 --- a/lib/librte_cryptodev/rte_cryptodev_pmd.h +++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h @@ -52,6 +52,7 @@ extern "C" { #include #include #include +#include #include "rte_crypto.h" #include "rte_cryptodev.h" @@ -61,17 +62,18 @@ extern "C" { #define RTE_PMD_DEBUG_TRACE(...) \ rte_pmd_debug_trace(__func__, __VA_ARGS__) #else -#define RTE_PMD_DEBUG_TRACE(fmt, args...) +#define RTE_PMD_DEBUG_TRACE(...) #endif struct rte_cryptodev_session { + RTE_STD_C11 struct { uint8_t dev_id; enum rte_cryptodev_type type; struct rte_mempool *mp; } __rte_aligned(8); - char _private[0]; + __extension__ char _private[0]; }; struct rte_cryptodev_driver; @@ -454,13 +456,12 @@ struct rte_cryptodev_ops { * to that slot for the driver to use. * * @param name Unique identifier name for each device - * @param type Device type of this Crypto device * @param socket_id Socket to allocate resources on. * @return * - Slot in the rte_dev_devices array for a new device; */ struct rte_cryptodev * -rte_cryptodev_pmd_allocate(const char *name, enum pmd_type type, int socket_id); +rte_cryptodev_pmd_allocate(const char *name, int socket_id); /** * Creates a new virtual crypto device and returns the pointer @@ -492,36 +493,6 @@ rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size, extern int rte_cryptodev_pmd_release_device(struct rte_cryptodev *cryptodev); - -/** - * Register a Crypto [Poll Mode] driver. - * - * Function invoked by the initialization function of a Crypto driver - * to simultaneously register itself as Crypto Poll Mode Driver and to either: - * - * a - register itself as PCI driver if the crypto device is a physical - * device, by invoking the rte_eal_pci_register() function to - * register the *pci_drv* structure embedded in the *crypto_drv* - * structure, after having stored the address of the - * rte_cryptodev_init() function in the *devinit* field of the - * *pci_drv* structure. - * - * During the PCI probing phase, the rte_cryptodev_init() - * function is invoked for each PCI [device] matching the - * embedded PCI identifiers provided by the driver. - * - * b, complete the initialization sequence if the device is a virtual - * device by calling the rte_cryptodev_init() directly passing a - * NULL parameter for the rte_pci_device structure. - * - * @param crypto_drv crypto_driver structure associated with the crypto - * driver. - * @param type pmd type - */ -extern int -rte_cryptodev_pmd_driver_register(struct rte_cryptodev_driver *crypto_drv, - enum pmd_type type); - /** * Executes all the user application registered callbacks for the specific * device. @@ -535,6 +506,18 @@ rte_cryptodev_pmd_driver_register(struct rte_cryptodev_driver *crypto_drv, void rte_cryptodev_pmd_callback_process(struct rte_cryptodev *dev, enum rte_cryptodev_event_type event); +/** + * Wrapper for use by pci drivers as a .probe function to attach to a crypto + * interface. + */ +int rte_cryptodev_pci_probe(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev); + +/** + * Wrapper for use by pci drivers as a .remove function to detach a crypto + * interface. + */ +int rte_cryptodev_pci_remove(struct rte_pci_device *pci_dev); #ifdef __cplusplus } diff --git a/lib/librte_cryptodev/rte_cryptodev_version.map b/lib/librte_cryptodev/rte_cryptodev_version.map index a08fd202..9dde0e72 100644 --- a/lib/librte_cryptodev/rte_cryptodev_version.map +++ b/lib/librte_cryptodev/rte_cryptodev_version.map @@ -14,7 +14,6 @@ DPDK_16.04 { rte_cryptodev_info_get; rte_cryptodev_pmd_allocate; rte_cryptodev_pmd_callback_process; - rte_cryptodev_pmd_driver_register; rte_cryptodev_pmd_release_device; rte_cryptodev_pmd_virtual_dev_init; rte_cryptodev_sym_session_create; @@ -39,3 +38,11 @@ DPDK_16.07 { rte_cryptodev_parse_vdev_init_params; } DPDK_16.04; + +DPDK_16.11 { + global: + + rte_cryptodev_pci_probe; + rte_cryptodev_pci_remove; + +} DPDK_16.07; diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile index 988cbbce..a15b762b 100644 --- a/lib/librte_eal/bsdapp/eal/Makefile +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -48,14 +48,13 @@ LDLIBS += -lgcc_s EXPORT_MAP := rte_eal_version.map -LIBABIVER := 2 +LIBABIVER := 3 -# specific to linuxapp exec-env +# specific to bsdapp exec-env SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c -SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_log.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_pci.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c @@ -69,6 +68,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_timer.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memzone.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_log.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_launch.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_vdev.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci_uio.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memory.c diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index a0c8f8c8..35e3117a 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -496,14 +496,14 @@ rte_eal_init(int argc, char **argv) char cpuset[RTE_CPU_AFFINITY_STR_LEN]; char thread_name[RTE_MAX_THREAD_NAME_LEN]; + /* checks if the machine is adequate */ + rte_cpu_check_supported(); + if (!rte_atomic32_test_and_set(&run_once)) return -1; thread_id = pthread_self(); - if (rte_eal_log_early_init() < 0) - rte_panic("Cannot init early logs\n"); - eal_log_level_parse(argc, argv); /* set log level as early as possible */ @@ -552,9 +552,6 @@ rte_eal_init(int argc, char **argv) if (rte_eal_tailqs_init() < 0) rte_panic("Cannot init tail queues for objects\n"); -/* if (rte_eal_log_init(argv[0], internal_config.syslog_facility) < 0) - rte_panic("Cannot init logs\n");*/ - if (rte_eal_alarm_init() < 0) rte_panic("Cannot init interrupt-handling thread\n"); diff --git a/lib/librte_eal/bsdapp/eal/eal_log.c b/lib/librte_eal/bsdapp/eal/eal_log.c deleted file mode 100644 index a425f7a8..00000000 --- a/lib/librte_eal/bsdapp/eal/eal_log.c +++ /dev/null @@ -1,57 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include - -#include - -/* - * set the log to default function, called during eal init process, - * once memzones are available. - */ -int -rte_eal_log_init(const char *id __rte_unused, int facility __rte_unused) -{ - if (rte_eal_common_log_init(stderr) < 0) - return -1; - return 0; -} - -int -rte_eal_log_early_init(void) -{ - rte_openlog_stream(stderr); - return 0; -} diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c index 374b68f2..8b3ed881 100644 --- a/lib/librte_eal/bsdapp/eal/eal_pci.c +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c @@ -287,7 +287,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) dev->max_vfs = 0; /* FreeBSD has no NUMA support (yet) */ - dev->numa_node = 0; + dev->device.numa_node = 0; /* FreeBSD has only one pass through driver */ dev->kdrv = RTE_KDRV_NIC_UIO; @@ -406,6 +406,55 @@ error: return -1; } +int +pci_update_device(const struct rte_pci_addr *addr) +{ + int fd; + struct pci_conf matches[2]; + struct pci_match_conf match = { + .pc_sel = { + .pc_domain = addr->domain, + .pc_bus = addr->bus, + .pc_dev = addr->devid, + .pc_func = addr->function, + }, + }; + struct pci_conf_io conf_io = { + .pat_buf_len = 0, + .num_patterns = 1, + .patterns = &match, + .match_buf_len = sizeof(matches), + .matches = &matches[0], + }; + + fd = open("/dev/pci", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) { + RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n", + __func__, strerror(errno)); + goto error; + } + + if (conf_io.num_matches != 1) + goto error; + + if (pci_scan_one(fd, &matches[0]) < 0) + goto error; + + close(fd); + + return 0; + +error: + if (fd >= 0) + close(fd); + return -1; +} + /* Read PCI config space. */ int rte_eal_pci_read_config(const struct rte_pci_device *dev, void *buf, size_t len, off_t offset) @@ -623,9 +672,6 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) int rte_eal_pci_init(void) { - TAILQ_INIT(&pci_driver_list); - TAILQ_INIT(&pci_device_list); - /* for debug purposes, PCI can be disabled */ if (internal_config.no_pci) return 0; diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map index a335e04b..2f81f7c0 100644 --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map @@ -162,3 +162,15 @@ DPDK_16.07 { rte_thread_setname; } DPDK_16.04; + +DPDK_16.11 { + global: + + rte_delay_us_block; + rte_delay_us_callback_register; + rte_eal_dev_attach; + rte_eal_dev_detach; + rte_eal_vdrv_register; + rte_eal_vdrv_unregister; + +} DPDK_16.07; diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile index f5ea0ee8..dfd64aa5 100644 --- a/lib/librte_eal/common/Makefile +++ b/lib/librte_eal/common/Makefile @@ -34,11 +34,11 @@ include $(RTE_SDK)/mk/rte.vars.mk INC := rte_branch_prediction.h rte_common.h INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h -INC += rte_pci_dev_ids.h rte_per_lcore.h rte_random.h +INC += rte_per_lcore.h rte_random.h INC += rte_tailq.h rte_interrupts.h rte_alarm.h INC += rte_string_fns.h rte_version.h INC += rte_eal_memconfig.h rte_malloc_heap.h -INC += rte_hexdump.h rte_devargs.h rte_dev.h +INC += rte_hexdump.h rte_devargs.h rte_dev.h rte_vdev.h INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h INC += rte_malloc.h rte_keepalive.h rte_time.h diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c index ecb12409..b5f76f7f 100644 --- a/lib/librte_eal/common/eal_common_cpuflags.c +++ b/lib/librte_eal/common/eal_common_cpuflags.c @@ -39,14 +39,8 @@ /** * Checks if the machine is adequate for running the binary. If it is not, the * program exits with status 1. - * The function attribute forces this function to be called before main(). But - * with ICC, the check is generated by the compiler. */ -#ifndef __INTEL_COMPILER -void __attribute__ ((__constructor__)) -#else void -#endif rte_cpu_check_supported(void) { /* This is generated at compile-time by the build system */ diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c index a8a4146c..4f3b4934 100644 --- a/lib/librte_eal/common/eal_common_dev.c +++ b/lib/librte_eal/common/eal_common_dev.c @@ -48,6 +48,9 @@ /** Global list of device drivers. */ static struct rte_driver_list dev_driver_list = TAILQ_HEAD_INITIALIZER(dev_driver_list); +/** Global list of device drivers. */ +static struct rte_device_list dev_device_list = + TAILQ_HEAD_INITIALIZER(dev_device_list); /* register a driver */ void @@ -63,42 +66,25 @@ rte_eal_driver_unregister(struct rte_driver *driver) TAILQ_REMOVE(&dev_driver_list, driver, next); } -int -rte_eal_vdev_init(const char *name, const char *args) +void rte_eal_device_insert(struct rte_device *dev) { - struct rte_driver *driver; - - if (name == NULL) - return -EINVAL; - - TAILQ_FOREACH(driver, &dev_driver_list, next) { - if (driver->type != PMD_VDEV) - continue; - - /* - * search a driver prefix in virtual device name. - * For example, if the driver is pcap PMD, driver->name - * will be "eth_pcap", but "name" will be "eth_pcapN". - * So use strncmp to compare. - */ - if (!strncmp(driver->name, name, strlen(driver->name))) - return driver->init(name, args); - } + TAILQ_INSERT_TAIL(&dev_device_list, dev, next); +} - RTE_LOG(ERR, EAL, "no driver found for %s\n", name); - return -EINVAL; +void rte_eal_device_remove(struct rte_device *dev) +{ + TAILQ_REMOVE(&dev_device_list, dev, next); } int rte_eal_dev_init(void) { struct rte_devargs *devargs; - struct rte_driver *driver; /* * Note that the dev_driver_list is populated here * from calls made to rte_eal_driver_register from constructor functions - * embedded into PMD modules via the PMD_REGISTER_DRIVER macro + * embedded into PMD modules via the RTE_PMD_REGISTER_VDEV macro */ /* call the init function for each virtual device */ @@ -115,38 +101,53 @@ rte_eal_dev_init(void) } } - /* Once the vdevs are initalized, start calling all the pdev drivers */ - TAILQ_FOREACH(driver, &dev_driver_list, next) { - if (driver->type != PMD_PDEV) - continue; - /* PDEV drivers don't get passed any parameters */ - driver->init(NULL, NULL); - } return 0; } -int -rte_eal_vdev_uninit(const char *name) +int rte_eal_dev_attach(const char *name, const char *devargs) { - struct rte_driver *driver; + struct rte_pci_addr addr; - if (name == NULL) + if (name == NULL || devargs == NULL) { + RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n"); return -EINVAL; + } - TAILQ_FOREACH(driver, &dev_driver_list, next) { - if (driver->type != PMD_VDEV) - continue; + if (eal_parse_pci_DomBDF(name, &addr) == 0) { + if (rte_eal_pci_probe_one(&addr) < 0) + goto err; + + } else { + if (rte_eal_vdev_init(name, devargs)) + goto err; + } + + return 0; + +err: + RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", name); + return -EINVAL; +} + +int rte_eal_dev_detach(const char *name) +{ + struct rte_pci_addr addr; - /* - * search a driver prefix in virtual device name. - * For example, if the driver is pcap PMD, driver->name - * will be "eth_pcap", but "name" will be "eth_pcapN". - * So use strncmp to compare. - */ - if (!strncmp(driver->name, name, strlen(driver->name))) - return driver->uninit(name); + if (name == NULL) { + RTE_LOG(ERR, EAL, "Invalid device provided.\n"); + return -EINVAL; } - RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + if (eal_parse_pci_DomBDF(name, &addr) == 0) { + if (rte_eal_pci_detach(&addr) < 0) + goto err; + } else { + if (rte_eal_vdev_uninit(name)) + goto err; + } + return 0; + +err: + RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n", name); return -EINVAL; } diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c index 7916c781..e45d3269 100644 --- a/lib/librte_eal/common/eal_common_log.c +++ b/lib/librte_eal/common/eal_common_log.c @@ -48,11 +48,12 @@ struct rte_logs rte_logs = { .file = NULL, }; +/* Stream to use for logging if rte_logs.file is NULL */ static FILE *default_log_stream; /** * This global structure stores some informations about the message - * that is currently beeing processed by one lcore + * that is currently being processed by one lcore */ struct log_cur_msg { uint32_t loglevel; /**< log level - see rte_log.h */ @@ -64,27 +65,11 @@ static RTE_DEFINE_PER_LCORE(struct log_cur_msg, log_cur_msg); /* default logs */ -int -rte_log_add_in_history(const char *buf __rte_unused, size_t size __rte_unused) -{ - return 0; -} - -void -rte_log_set_history(int enable) -{ - if (enable) - RTE_LOG(WARNING, EAL, "The log history is deprecated.\n"); -} - /* Change the stream that will be used by logging system */ int rte_openlog_stream(FILE *f) { - if (f == NULL) - rte_logs.file = default_log_stream; - else - rte_logs.file = f; + rte_logs.file = f; return 0; } @@ -131,12 +116,6 @@ int rte_log_cur_msg_logtype(void) return RTE_PER_LCORE(log_cur_msg).logtype; } -/* Dump log history to file */ -void -rte_log_dump_history(FILE *out __rte_unused) -{ -} - /* * Generates a log message The message will be sent in the stream * defined by the previous call to rte_openlog_stream(). @@ -146,6 +125,19 @@ rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) { int ret; FILE *f = rte_logs.file; + if (f == NULL) { + f = default_log_stream; + if (f == NULL) { + /* + * Grab the current value of stderr here, rather than + * just initializing default_log_stream to stderr. This + * ensures that we will always use the current value + * of stderr, even if the application closes and + * reopens it. + */ + f = stderr; + } + } if ((level > rte_logs.level) || !(logtype & rte_logs.type)) return 0; @@ -177,17 +169,14 @@ rte_log(uint32_t level, uint32_t logtype, const char *format, ...) } /* - * called by environment-specific log init function + * Called by environment-specific initialization functions. */ -int -rte_eal_common_log_init(FILE *default_log) +void +eal_log_set_default(FILE *default_log) { default_log_stream = default_log; - rte_openlog_stream(default_log); #if RTE_LOG_LEVEL >= RTE_LOG_DEBUG RTE_LOG(NOTICE, EAL, "Debug logs available - lower performance\n"); #endif - - return 0; } diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index 1bd0a33d..64f4e0ad 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -337,19 +337,7 @@ rte_memzone_free(const struct rte_memzone *mz) idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone); idx = idx / sizeof(struct rte_memzone); -#ifdef RTE_LIBRTE_IVSHMEM - /* - * If ioremap_addr is set, it's an IVSHMEM memzone and we cannot - * free it. - */ - if (mcfg->memzone[idx].ioremap_addr != 0) { - rte_rwlock_write_unlock(&mcfg->mlock); - return -EINVAL; - } -#endif - addr = mcfg->memzone[idx].addr; - if (addr == NULL) ret = -EINVAL; else if (mcfg->memzone_cnt == 0) { diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index 1a1bab36..6ca8af17 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -1021,7 +1021,7 @@ eal_common_usage(void) " [NOTE: PCI whitelist cannot be used with -b option]\n" " --"OPT_VDEV" Add a virtual device.\n" " The argument format is [,key=val,...]\n" - " (ex: --vdev=eth_pcap0,iface=eth2).\n" + " (ex: --vdev=net_pcap0,iface=eth2).\n" " -d LIB.so|DIR Add a driver or driver directory\n" " (can be used multiple times)\n" " --"OPT_VMWARE_TSC_MAP" Use VMware TSC map instead of native RDTSC\n" diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c index 096c65e4..6bff6752 100644 --- a/lib/librte_eal/common/eal_common_pci.c +++ b/lib/librte_eal/common/eal_common_pci.c @@ -82,8 +82,10 @@ #include "eal_private.h" -struct pci_driver_list pci_driver_list; -struct pci_device_list pci_device_list; +struct pci_driver_list pci_driver_list = + TAILQ_HEAD_INITIALIZER(pci_driver_list); +struct pci_device_list pci_device_list = + TAILQ_HEAD_INITIALIZER(pci_device_list); #define SYSFS_PCI_DEVICES "/sys/bus/pci/devices" @@ -151,7 +153,7 @@ pci_unmap_resource(void *requested_addr, size_t size) } /* - * If vendor/device ID match, call the devinit() function of the + * If vendor/device ID match, call the probe() function of the * driver. */ static int @@ -183,17 +185,18 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", loc->domain, loc->bus, loc->devid, loc->function, - dev->numa_node); + dev->device.numa_node); /* no initialization when blacklisted, return without error */ - if (dev->devargs != NULL && - dev->devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) { + if (dev->device.devargs != NULL && + dev->device.devargs->type == + RTE_DEVTYPE_BLACKLISTED_PCI) { RTE_LOG(INFO, EAL, " Device is blacklisted, not initializing\n"); return 1; } RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, - dev->id.device_id, dr->name); + dev->id.device_id, dr->driver.name); if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { /* map resources for devices that use igb_uio */ @@ -210,15 +213,19 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d /* reference driver structure */ dev->driver = dr; - /* call the driver devinit() function */ - return dr->devinit(dr, dev); + /* call the driver probe() function */ + ret = dr->probe(dr, dev); + if (ret) + dev->driver = NULL; + + return ret; } /* return positive value if driver doesn't support this device */ return 1; } /* - * If vendor/device ID match, call the devuninit() function of the + * If vendor/device ID match, call the remove() function of the * driver. */ static int @@ -250,12 +257,12 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr, RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", loc->domain, loc->bus, loc->devid, - loc->function, dev->numa_node); + loc->function, dev->device.numa_node); RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id, - dev->id.device_id, dr->name); + dev->id.device_id, dr->driver.name); - if (dr->devuninit && (dr->devuninit(dev) < 0)) + if (dr->remove && (dr->remove(dev) < 0)) return -1; /* negative value is an error */ /* clear driver structure */ @@ -273,7 +280,7 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr, } /* - * If vendor/device ID match, call the devinit() function of all + * If vendor/device ID match, call the probe() function of all * registered driver for the given device. Return -1 if initialization * failed, return 1 if no driver is found for this device. */ @@ -286,6 +293,10 @@ pci_probe_all_drivers(struct rte_pci_device *dev) if (dev == NULL) return -1; + /* Check if a driver is already loaded */ + if (dev->driver != NULL) + return 0; + TAILQ_FOREACH(dr, &pci_driver_list, next) { rc = rte_eal_pci_probe_one_driver(dr, dev); if (rc < 0) @@ -300,7 +311,7 @@ pci_probe_all_drivers(struct rte_pci_device *dev) } /* - * If vendor/device ID match, call the devuninit() function of all + * If vendor/device ID match, call the remove() function of all * registered driver for the given device. Return -1 if initialization * failed, return 1 if no driver is found for this device. */ @@ -339,6 +350,12 @@ rte_eal_pci_probe_one(const struct rte_pci_addr *addr) if (addr == NULL) return -1; + /* update current pci device in global list, kernel bindings might have + * changed since last time we looked at it. + */ + if (pci_update_device(addr) < 0) + goto err_return; + TAILQ_FOREACH(dev, &pci_device_list, next) { if (rte_eal_compare_pci_addr(&dev->addr, addr)) continue; @@ -351,9 +368,9 @@ rte_eal_pci_probe_one(const struct rte_pci_addr *addr) return -1; err_return: - RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT - " cannot be used\n", dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); + RTE_LOG(WARNING, EAL, + "Requested device " PCI_PRI_FMT " cannot be used\n", + addr->domain, addr->bus, addr->devid, addr->function); return -1; } @@ -391,7 +408,7 @@ err_return: } /* - * Scan the content of the PCI bus, and call the devinit() function for + * Scan the content of the PCI bus, and call the probe() function for * all registered drivers that have a matching entry in its id_table * for discovered devices. */ @@ -411,7 +428,7 @@ rte_eal_pci_probe(void) /* set devargs in PCI structure */ devargs = pci_devargs_lookup(dev); if (devargs != NULL) - dev->devargs = devargs; + dev->device.devargs = devargs; /* probe all or only whitelisted devices */ if (probe_all) @@ -464,11 +481,13 @@ void rte_eal_pci_register(struct rte_pci_driver *driver) { TAILQ_INSERT_TAIL(&pci_driver_list, driver, next); + rte_eal_driver_register(&driver->driver); } /* unregister a driver */ void rte_eal_pci_unregister(struct rte_pci_driver *driver) { + rte_eal_driver_unregister(&driver->driver); TAILQ_REMOVE(&pci_driver_list, driver, next); } diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c index c4227cd8..72656176 100644 --- a/lib/librte_eal/common/eal_common_timer.c +++ b/lib/librte_eal/common/eal_common_timer.c @@ -47,8 +47,11 @@ /* The frequency of the RDTSC timer resolution */ static uint64_t eal_tsc_resolution_hz; +/* Pointer to user delay function */ +void (*rte_delay_us)(unsigned int) = NULL; + void -rte_delay_us(unsigned us) +rte_delay_us_block(unsigned int us) { const uint64_t start = rte_get_timer_cycles(); const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6; @@ -84,3 +87,15 @@ set_tsc_freq(void) RTE_LOG(DEBUG, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000); eal_tsc_resolution_hz = freq; } + +void rte_delay_us_callback_register(void (*userfunc)(unsigned int)) +{ + rte_delay_us = userfunc; +} + +static void __attribute__((constructor)) +rte_timer_init(void) +{ + /* set rte_delay_us_block as a delay function */ + rte_delay_us_callback_register(rte_delay_us_block); +} diff --git a/lib/librte_eal/common/eal_common_vdev.c b/lib/librte_eal/common/eal_common_vdev.c new file mode 100644 index 00000000..0ff2377d --- /dev/null +++ b/lib/librte_eal/common/eal_common_vdev.c @@ -0,0 +1,116 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +struct vdev_driver_list vdev_driver_list = + TAILQ_HEAD_INITIALIZER(vdev_driver_list); + +/* register a driver */ +void +rte_eal_vdrv_register(struct rte_vdev_driver *driver) +{ + TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next); + rte_eal_driver_register(&driver->driver); +} + +/* unregister a driver */ +void +rte_eal_vdrv_unregister(struct rte_vdev_driver *driver) +{ + rte_eal_driver_unregister(&driver->driver); + TAILQ_REMOVE(&vdev_driver_list, driver, next); +} + +int +rte_eal_vdev_init(const char *name, const char *args) +{ + struct rte_vdev_driver *driver; + + if (name == NULL) + return -EINVAL; + + TAILQ_FOREACH(driver, &vdev_driver_list, next) { + /* + * search a driver prefix in virtual device name. + * For example, if the driver is pcap PMD, driver->name + * will be "net_pcap", but "name" will be "net_pcapN". + * So use strncmp to compare. + */ + if (!strncmp(driver->driver.name, name, + strlen(driver->driver.name))) + return driver->probe(name, args); + } + + /* Give new names precedence over aliases. */ + TAILQ_FOREACH(driver, &vdev_driver_list, next) { + if (driver->driver.alias && + !strncmp(driver->driver.alias, name, + strlen(driver->driver.alias))) + return driver->probe(name, args); + } + + RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + return -EINVAL; +} + +int +rte_eal_vdev_uninit(const char *name) +{ + struct rte_vdev_driver *driver; + + if (name == NULL) + return -EINVAL; + + TAILQ_FOREACH(driver, &vdev_driver_list, next) { + /* + * search a driver prefix in virtual device name. + * For example, if the driver is pcap PMD, driver->name + * will be "net_pcap", but "name" will be "net_pcapN". + * So use strncmp to compare. + */ + if (!strncmp(driver->driver.name, name, + strlen(driver->driver.name))) + return driver->remove(name); + } + + RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + return -EINVAL; +} diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h index fdb4a70b..8acbd996 100644 --- a/lib/librte_eal/common/eal_filesystem.h +++ b/lib/librte_eal/common/eal_filesystem.h @@ -97,17 +97,6 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id return buffer; } -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS -static inline const char * -eal_get_hugefile_temp_path(char *buffer, size_t buflen, const char *hugedir, int f_id) -{ - snprintf(buffer, buflen, TEMP_HUGEFILE_FMT, hugedir, - internal_config.hugefile_prefix, f_id); - buffer[buflen - 1] = '\0'; - return buffer; -} -#endif - /** define the default filename prefix for the %s values above */ #define HUGEFILE_PREFIX_DEFAULT "rte" diff --git a/lib/librte_eal/common/eal_hugepages.h b/lib/librte_eal/common/eal_hugepages.h index 38edac03..68369f26 100644 --- a/lib/librte_eal/common/eal_hugepages.h +++ b/lib/librte_eal/common/eal_hugepages.h @@ -52,9 +52,6 @@ struct hugepage_file { int socket_id; /**< NUMA socket ID */ int file_id; /**< the '%d' in HUGEFILE_FMT */ int memseg_id; /**< the memory segment to which page belongs */ -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - int repeated; /**< number of times the page size is repeated */ -#endif char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */ }; diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 857dc3ea..9e7d8f6b 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -47,7 +47,9 @@ int rte_eal_memzone_init(void); /** - * Common log initialization function (private to eal). + * Common log initialization function (private to eal). Determines + * where log data is written when no call to rte_openlog_stream is + * in effect. * * @param default_log * The default log stream to be used. @@ -55,7 +57,7 @@ int rte_eal_memzone_init(void); * - 0 on success * - Negative on error */ -int rte_eal_common_log_init(FILE *default_log); +void eal_log_set_default(FILE *default_log); /** * Fill configuration with number of physical and logical processors @@ -96,16 +98,6 @@ int rte_eal_memory_init(void); */ int rte_eal_timer_init(void); -/** - * Init early logs - * - * This function is private to EAL. - * - * @return - * 0 on success, negative on error - */ -int rte_eal_log_early_init(void); - /** * Init the default log stream * @@ -117,7 +109,7 @@ int rte_eal_log_early_init(void); int rte_eal_log_init(const char *id, int facility); /** - * Init the default log stream + * Init the PCI infrastructure * * This function is private to EAL. * @@ -126,30 +118,21 @@ int rte_eal_log_init(const char *id, int facility); */ int rte_eal_pci_init(void); -#ifdef RTE_LIBRTE_IVSHMEM -/** - * Init the memory from IVSHMEM devices - * - * This function is private to EAL. - * - * @return - * 0 on success, negative on error - */ -int rte_eal_ivshmem_init(void); +struct rte_pci_driver; +struct rte_pci_device; /** - * Init objects in IVSHMEM devices + * Update a pci device object by asking the kernel for the latest information. * * This function is private to EAL. * + * @param addr + * The PCI Bus-Device-Function address to look for * @return - * 0 on success, negative on error + * - 0 on success. + * - negative on error. */ -int rte_eal_ivshmem_obj_init(void); -#endif - -struct rte_pci_driver; -struct rte_pci_device; +int pci_update_device(const struct rte_pci_addr *addr); /** * Unbind kernel driver for this device @@ -258,13 +241,6 @@ int rte_eal_intr_init(void); */ int rte_eal_alarm_init(void); -/** - * This function initialises any virtual devices - * - * This function is private to the EAL. - */ -int rte_eal_dev_init(void); - /** * Function is to check if the kernel module(like, vfio, vfio_iommu_type1, * etc.) loaded. diff --git a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h index 3f2dd1f2..1b312b30 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h +++ b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h @@ -41,6 +41,8 @@ extern "C" { #endif +#include +#include #include "generic/rte_byteorder.h" /* fix missing __builtin_bswap16 for gcc older then 4.8 */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h index 14f26120..867a9468 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h +++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h @@ -45,6 +45,11 @@ extern "C" { * @return * The time base for this lcore. */ +#ifndef RTE_ARM_EAL_RDTSC_USE_PMU +/** + * This call is portable to any ARMv8 architecture, however, typically + * cntvct_el0 runs at <= 100MHz and it may be imprecise for some tasks. + */ static inline uint64_t rte_rdtsc(void) { @@ -53,6 +58,34 @@ rte_rdtsc(void) asm volatile("mrs %0, cntvct_el0" : "=r" (tsc)); return tsc; } +#else +/** + * This is an alternative method to enable rte_rdtsc() with high resolution + * PMU cycles counter.The cycle counter runs at cpu frequency and this scheme + * uses ARMv8 PMU subsystem to get the cycle counter at userspace, However, + * access to PMU cycle counter from user space is not enabled by default in + * arm64 linux kernel. + * It is possible to enable cycle counter at user space access by configuring + * the PMU from the privileged mode (kernel space). + * + * asm volatile("msr pmintenset_el1, %0" : : "r" ((u64)(0 << 31))); + * asm volatile("msr pmcntenset_el0, %0" :: "r" BIT(31)); + * asm volatile("msr pmuserenr_el0, %0" : : "r"(BIT(0) | BIT(2))); + * asm volatile("mrs %0, pmcr_el0" : "=r" (val)); + * val |= (BIT(0) | BIT(2)); + * isb(); + * asm volatile("msr pmcr_el0, %0" : : "r" (val)); + * + */ +static inline uint64_t +rte_rdtsc(void) +{ + uint64_t tsc; + + asm volatile("mrs %0, pmccntr_el0" : "=r"(tsc)); + return tsc; +} +#endif static inline uint64_t rte_rdtsc_precise(void) diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h index da6c233a..c3a26192 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h +++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h @@ -148,7 +148,8 @@ rte_mov256(uint8_t *dst, const uint8_t *src) } #define rte_memcpy(dst, src, n) \ - ({ (__builtin_constant_p(n)) ? \ + __extension__ ({ \ + (__builtin_constant_p(n)) ? \ memcpy((dst), (src), (n)) : \ rte_memcpy_func((dst), (src), (n)); }) diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h index 5aeed22d..43cde172 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h +++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h @@ -37,6 +37,7 @@ extern "C" { #endif +#include #include "generic/rte_prefetch.h" static inline void rte_prefetch0(const volatile void *p) diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h index 3ed46a46..0d077ea6 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h +++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h @@ -37,6 +37,7 @@ extern "C" { #endif +#include #include "generic/rte_prefetch.h" static inline void rte_prefetch0(const volatile void *p) diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h index a33c0544..b86c2cf5 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h @@ -33,6 +33,7 @@ #ifndef _RTE_VECT_ARM_H_ #define _RTE_VECT_ARM_H_ +#include #include "arm_neon.h" #ifdef __cplusplus diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h index 924e8940..fb4fccb4 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h @@ -46,6 +46,7 @@ extern "C" { #endif +#include #include "generic/rte_atomic.h" /** diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h index 3c1734ed..544de3c2 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h @@ -42,6 +42,7 @@ extern "C" { #endif +#include #include "generic/rte_byteorder.h" /* diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h index 64beddf9..8fa6fc60 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h @@ -40,6 +40,7 @@ extern "C" { #include "generic/rte_cycles.h" #include +#include /** * Read the time base register. @@ -52,6 +53,7 @@ rte_rdtsc(void) { union { uint64_t tsc_64; + RTE_STD_C11 struct { #if RTE_BYTE_ORDER == RTE_BIG_ENDIAN uint32_t hi_32; diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h index acf7aac2..ca9d1dc5 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h @@ -95,7 +95,8 @@ rte_mov256(uint8_t *dst, const uint8_t *src) } #define rte_memcpy(dst, src, n) \ - ({ (__builtin_constant_p(n)) ? \ + __extension__ ({ \ + (__builtin_constant_p(n)) ? \ memcpy((dst), (src), (n)) : \ rte_memcpy_func((dst), (src), (n)); }) diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h index 9a1995ea..fd2e53b9 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h @@ -37,6 +37,7 @@ extern "C" { #endif +#include #include "generic/rte_prefetch.h" static inline void rte_prefetch0(const volatile void *p) diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h b/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h new file mode 100644 index 00000000..05209e52 --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h @@ -0,0 +1,60 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_VECT_PPC_64_H_ +#define _RTE_VECT_PPC_64_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef vector signed int xmm_t; + +#define XMM_SIZE (sizeof(xmm_t)) +#define XMM_MASK (XMM_SIZE - 1) + +typedef union rte_xmm { + xmm_t x; + uint8_t u8[XMM_SIZE / sizeof(uint8_t)]; + uint16_t u16[XMM_SIZE / sizeof(uint16_t)]; + uint32_t u32[XMM_SIZE / sizeof(uint32_t)]; + uint64_t u64[XMM_SIZE / sizeof(uint64_t)]; + double pd[XMM_SIZE / sizeof(double)]; +} __attribute__((aligned(16))) rte_xmm_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_VECT_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/lib/librte_eal/common/include/arch/x86/rte_atomic.h index b20056b8..00b1cdf5 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h @@ -38,6 +38,8 @@ extern "C" { #endif +#include +#include #include #include "generic/rte_atomic.h" diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h index 400d8a96..2e04c759 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h @@ -37,9 +37,17 @@ * All rights reserved. */ +#ifndef _RTE_ATOMIC_X86_H_ +#error do not include this file directly, use instead +#endif + #ifndef _RTE_ATOMIC_I686_H_ #define _RTE_ATOMIC_I686_H_ +#include +#include +#include + /*------------------------- 64 bit atomic operations -------------------------*/ #ifndef RTE_FORCE_INTRINSICS @@ -47,6 +55,7 @@ static inline int rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) { uint8_t res; + RTE_STD_C11 union { struct { uint32_t l32; diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h index 4de66000..1a53a766 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h @@ -37,9 +37,17 @@ * All rights reserved. */ +#ifndef _RTE_ATOMIC_X86_H_ +#error do not include this file directly, use instead +#endif + #ifndef _RTE_ATOMIC_X86_64_H_ #define _RTE_ATOMIC_X86_64_H_ +#include +#include +#include + /*------------------------- 64 bit atomic operations -------------------------*/ #ifndef RTE_FORCE_INTRINSICS diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder.h index ffdb6ef5..251f11b4 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_byteorder.h +++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder.h @@ -38,6 +38,8 @@ extern "C" { #endif +#include +#include #include "generic/rte_byteorder.h" #ifndef RTE_BYTE_ORDER diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h index 51c306f8..14d64834 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h +++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h @@ -31,9 +31,16 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifndef _RTE_BYTEORDER_X86_H_ +#error do not include this file directly, use instead +#endif + #ifndef _RTE_BYTEORDER_I686_H_ #define _RTE_BYTEORDER_I686_H_ +#include +#include + /* * An architecture-optimized byte swap for a 64-bit value. * diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h index dda572bd..516ac052 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h +++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h @@ -31,9 +31,16 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifndef _RTE_BYTEORDER_X86_H_ +#error do not include this file directly, use instead +#endif + #ifndef _RTE_BYTEORDER_X86_64_H_ #define _RTE_BYTEORDER_X86_64_H_ +#include +#include + /* * An architecture-optimized byte swap for a 64-bit value. * diff --git a/lib/librte_eal/common/include/arch/x86/rte_cycles.h b/lib/librte_eal/common/include/arch/x86/rte_cycles.h index 6e3c7d89..5eb6ce96 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_cycles.h +++ b/lib/librte_eal/common/include/arch/x86/rte_cycles.h @@ -75,12 +75,14 @@ extern "C" { extern int rte_cycles_vmware_tsc_map; #include #endif +#include static inline uint64_t rte_rdtsc(void) { union { uint64_t tsc_64; + RTE_STD_C11 struct { uint32_t lo_32; uint32_t hi_32; diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h index 413035e7..b3bfc235 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h +++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h @@ -594,7 +594,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src) * - __m128i ~ must be pre-defined */ #define MOVEUNALIGNED_LEFT47_IMM(dst, src, len, offset) \ -({ \ +__extension__ ({ \ int tmp; \ while (len >= 128 + 16 - offset) { \ xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \ @@ -655,7 +655,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src) * - __m128i ~ used in MOVEUNALIGNED_LEFT47_IMM must be pre-defined */ #define MOVEUNALIGNED_LEFT47(dst, src, len, offset) \ -({ \ +__extension__ ({ \ switch (offset) { \ case 0x01: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x01); break; \ case 0x02: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x02); break; \ diff --git a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h index 5dac47eb..f464398f 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h +++ b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h @@ -38,6 +38,7 @@ extern "C" { #endif +#include #include "generic/rte_prefetch.h" static inline void rte_prefetch0(const volatile void *p) diff --git a/lib/librte_eal/common/include/arch/x86/rte_rtm.h b/lib/librte_eal/common/include/arch/x86/rte_rtm.h index 0649f794..ab099952 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_rtm.h +++ b/lib/librte_eal/common/include/arch/x86/rte_rtm.h @@ -20,6 +20,7 @@ /* Official RTM intrinsics interface matching gcc/icc, but works on older gcc compatible compilers and binutils. */ +#include #ifdef __cplusplus extern "C" { diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h index b698797c..77f2e253 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_vect.h +++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h @@ -40,6 +40,8 @@ * RTE SSE/AVX related header. */ +#include + #if (defined(__ICC) || (__GNUC__ == 4 && __GNUC_MINOR__ < 4)) #ifdef __SSE__ @@ -106,7 +108,8 @@ typedef union rte_ymm { #endif /* __AVX__ */ #ifdef RTE_ARCH_I686 -#define _mm_cvtsi128_si64(a) ({ \ +#define _mm_cvtsi128_si64(a) \ +__extension__ ({ \ rte_xmm_t m; \ m.x = (a); \ (m.u64[0]); \ @@ -117,7 +120,8 @@ typedef union rte_ymm { * Prior to version 12.1 icc doesn't support _mm_set_epi64x. */ #if (defined(__ICC) && __ICC < 1210) -#define _mm_set_epi64x(a, b) ({ \ +#define _mm_set_epi64x(a, b) \ +__extension__ ({ \ rte_xmm_t m; \ m.u64[0] = b; \ m.u64[1] = a; \ diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h index bfb4fe44..43a704ec 100644 --- a/lib/librte_eal/common/include/generic/rte_atomic.h +++ b/lib/librte_eal/common/include/generic/rte_atomic.h @@ -42,6 +42,7 @@ */ #include +#include #ifdef __DOXYGEN__ diff --git a/lib/librte_eal/common/include/generic/rte_byteorder.h b/lib/librte_eal/common/include/generic/rte_byteorder.h index c46fdcf2..e00bccbc 100644 --- a/lib/librte_eal/common/include/generic/rte_byteorder.h +++ b/lib/librte_eal/common/include/generic/rte_byteorder.h @@ -50,6 +50,8 @@ #include #endif +#include + /* * Compile-time endianness detection */ diff --git a/lib/librte_eal/common/include/generic/rte_cpuflags.h b/lib/librte_eal/common/include/generic/rte_cpuflags.h index c1da357c..71321f32 100644 --- a/lib/librte_eal/common/include/generic/rte_cpuflags.h +++ b/lib/librte_eal/common/include/generic/rte_cpuflags.h @@ -44,6 +44,7 @@ /** * Enumeration of all CPU features supported */ +__extension__ enum rte_cpu_flag_t; /** @@ -55,6 +56,7 @@ enum rte_cpu_flag_t; * flag name * NULL if flag ID is invalid */ +__extension__ const char * rte_cpu_get_flag_name(enum rte_cpu_flag_t feature); @@ -68,6 +70,7 @@ rte_cpu_get_flag_name(enum rte_cpu_flag_t feature); * 0 if flag is not available * -ENOENT if flag is invalid */ +__extension__ int rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature); diff --git a/lib/librte_eal/common/include/generic/rte_cycles.h b/lib/librte_eal/common/include/generic/rte_cycles.h index 8cc21f20..00103ca9 100644 --- a/lib/librte_eal/common/include/generic/rte_cycles.h +++ b/lib/librte_eal/common/include/generic/rte_cycles.h @@ -180,15 +180,16 @@ rte_get_timer_hz(void) default: rte_panic("Invalid timer source specified\n"); } } - /** * Wait at least us microseconds. + * This function can be replaced with user-defined function. + * @see rte_delay_us_callback_register * * @param us * The number of microseconds to wait. */ -void -rte_delay_us(unsigned us); +extern void +(*rte_delay_us)(unsigned int us); /** * Wait at least ms milliseconds. @@ -202,4 +203,21 @@ rte_delay_ms(unsigned ms) rte_delay_us(ms * 1000); } +/** + * Blocking delay function. + * + * @param us + * Number of microseconds to wait. + */ +void rte_delay_us_block(unsigned int us); + +/** + * Replace rte_delay_us with user defined function. + * + * @param userfunc + * User function which replaces rte_delay_us. rte_delay_us_block restores + * buildin block delay function. + */ +void rte_delay_us_callback_register(void(*userfunc)(unsigned int)); + #endif /* _RTE_CYCLES_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_memcpy.h b/lib/librte_eal/common/include/generic/rte_memcpy.h index afb0afe4..4e9d8794 100644 --- a/lib/librte_eal/common/include/generic/rte_memcpy.h +++ b/lib/librte_eal/common/include/generic/rte_memcpy.h @@ -64,6 +64,8 @@ rte_mov16(uint8_t *dst, const uint8_t *src); static inline void rte_mov32(uint8_t *dst, const uint8_t *src); +#ifdef __DOXYGEN__ + /** * Copy 48 bytes from one location to another using optimised * instructions. The locations should not overlap. @@ -76,6 +78,8 @@ rte_mov32(uint8_t *dst, const uint8_t *src); static inline void rte_mov48(uint8_t *dst, const uint8_t *src); +#endif /* __DOXYGEN__ */ + /** * Copy 64 bytes from one location to another using optimised * instructions. The locations should not overlap. diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h index 332f2a43..db5ac91c 100644 --- a/lib/librte_eal/common/include/rte_common.h +++ b/lib/librte_eal/common/include/rte_common.h @@ -59,6 +59,13 @@ extern "C" { #define asm __asm__ #endif +/** C extension macro for environments lacking C11 features. */ +#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L +#define RTE_STD_C11 __extension__ +#else +#define RTE_STD_C11 +#endif + #ifdef RTE_ARCH_STRICT_ALIGN typedef uint64_t unaligned_uint64_t __attribute__ ((aligned(1))); typedef uint32_t unaligned_uint32_t __attribute__ ((aligned(1))); @@ -268,7 +275,8 @@ rte_align64pow2(uint64_t v) /** * Macro to return the minimum of two numbers */ -#define RTE_MIN(a, b) ({ \ +#define RTE_MIN(a, b) \ + __extension__ ({ \ typeof (a) _a = (a); \ typeof (b) _b = (b); \ _a < _b ? _a : _b; \ @@ -277,7 +285,8 @@ rte_align64pow2(uint64_t v) /** * Macro to return the maximum of two numbers */ -#define RTE_MAX(a, b) ({ \ +#define RTE_MAX(a, b) \ + __extension__ ({ \ typeof (a) _a = (a); \ typeof (b) _b = (b); \ _a > _b ? _a : _b; \ @@ -326,6 +335,15 @@ rte_bsf32(uint32_t v) /** Take a macro value and get a string version of it */ #define RTE_STR(x) _RTE_STR(x) +/** + * ISO C helpers to modify format strings using variadic macros. + * This is a replacement for the ", ## __VA_ARGS__" GNU extension. + * An empty %s argument is appended to avoid a dangling comma. + */ +#define RTE_FMT(fmt, ...) fmt "%.0s", __VA_ARGS__ "" +#define RTE_FMT_HEAD(fmt, ...) fmt +#define RTE_FMT_TAIL(fmt, ...) __VA_ARGS__ + /** Mask value of type "tp" for the first "ln" bit set. */ #define RTE_LEN2MASK(ln, tp) \ ((tp)((uint64_t)-1 >> (sizeof(uint64_t) * CHAR_BIT - (ln)))) diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h index 95789f9d..8840380d 100644 --- a/lib/librte_eal/common/include/rte_dev.h +++ b/lib/librte_eal/common/include/rte_dev.h @@ -100,37 +100,56 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...) } \ } while (0) +/** + * A generic memory resource representation. + */ +struct rte_mem_resource { + uint64_t phys_addr; /**< Physical address, 0 if not resource. */ + uint64_t len; /**< Length of the resource. */ + void *addr; /**< Virtual address, NULL when not mapped. */ +}; /** Double linked list of device drivers. */ TAILQ_HEAD(rte_driver_list, rte_driver); +/** Double linked list of devices. */ +TAILQ_HEAD(rte_device_list, rte_device); + +/* Forward declaration */ +struct rte_driver; /** - * Initialization function called for each device driver once. + * A structure describing a generic device. */ -typedef int (rte_dev_init_t)(const char *name, const char *args); +struct rte_device { + TAILQ_ENTRY(rte_device) next; /**< Next device */ + struct rte_driver *driver; /**< Associated driver */ + int numa_node; /**< NUMA node connection */ + struct rte_devargs *devargs; /**< Device user arguments */ +}; /** - * Uninitilization function called for each device driver once. + * Insert a device detected by a bus scanning. + * + * @param dev + * A pointer to a rte_device structure describing the detected device. */ -typedef int (rte_dev_uninit_t)(const char *name); +void rte_eal_device_insert(struct rte_device *dev); /** - * Driver type enumeration + * Remove a device (e.g. when being unplugged). + * + * @param dev + * A pointer to a rte_device structure describing the device to be removed. */ -enum pmd_type { - PMD_VDEV = 0, - PMD_PDEV = 1, -}; +void rte_eal_device_remove(struct rte_device *dev); /** * A structure describing a device driver. */ struct rte_driver { TAILQ_ENTRY(rte_driver) next; /**< Next in list. */ - enum pmd_type type; /**< PMD Driver type */ const char *name; /**< Driver name. */ - rte_dev_init_t *init; /**< Device init. function. */ - rte_dev_uninit_t *uninit; /**< Device uninit. function. */ + const char *alias; /**< Driver alias. */ }; /** @@ -178,28 +197,45 @@ int rte_eal_vdev_init(const char *name, const char *args); */ int rte_eal_vdev_uninit(const char *name); -#define DRIVER_EXPORT_NAME_ARRAY(n, idx) n##idx[] +/** + * Attach a device to a registered driver. + * + * @param name + * The device name, that refers to a pci device (or some private + * way of designating a vdev device). Based on this device name, eal + * will identify a driver capable of handling it and pass it to the + * driver probing function. + * @param devargs + * Device arguments to be passed to the driver. + * @return + * 0 on success, negative on error. + */ +int rte_eal_dev_attach(const char *name, const char *devargs); -#define DRIVER_EXPORT_NAME(name, idx) \ -static const char DRIVER_EXPORT_NAME_ARRAY(this_pmd_name, idx) \ -__attribute__((used)) = RTE_STR(name) +/** + * Detach a device from its driver. + * + * @param name + * Same description as for rte_eal_dev_attach(). + * Here, eal will call the driver detaching function. + * @return + * 0 on success, negative on error. + */ +int rte_eal_dev_detach(const char *name); -#define PMD_REGISTER_DRIVER(drv, nm)\ -void devinitfn_ ##drv(void);\ -void __attribute__((constructor, used)) devinitfn_ ##drv(void)\ -{\ - (drv).name = RTE_STR(nm);\ - rte_eal_driver_register(&drv);\ -} \ -DRIVER_EXPORT_NAME(nm, __COUNTER__) +#define RTE_PMD_EXPORT_NAME_ARRAY(n, idx) n##idx[] + +#define RTE_PMD_EXPORT_NAME(name, idx) \ +static const char RTE_PMD_EXPORT_NAME_ARRAY(this_pmd_name, idx) \ +__attribute__((used)) = RTE_STR(name) #define DRV_EXP_TAG(name, tag) __##name##_##tag -#define DRIVER_REGISTER_PCI_TABLE(name, table) \ +#define RTE_PMD_REGISTER_PCI_TABLE(name, table) \ static const char DRV_EXP_TAG(name, pci_tbl_export)[] __attribute__((used)) = \ RTE_STR(table) -#define DRIVER_REGISTER_PARAM_STRING(name, str) \ +#define RTE_PMD_REGISTER_PARAM_STRING(name, str) \ static const char DRV_EXP_TAG(name, param_string_export)[] \ __attribute__((used)) = str diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h index 53c59f56..88120a1c 100644 --- a/lib/librte_eal/common/include/rte_devargs.h +++ b/lib/librte_eal/common/include/rte_devargs.h @@ -76,6 +76,7 @@ struct rte_devargs { TAILQ_ENTRY(rte_devargs) next; /** Type of device. */ enum rte_devtype type; + RTE_STD_C11 union { /** Used if type is RTE_DEVTYPE_*_PCI. */ struct { @@ -106,8 +107,8 @@ extern struct rte_devargs_list devargs_list; * "04:00.0,arg=val". * * For virtual devices, the format of arguments string is "DRIVER_NAME*" - * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring", - * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". + * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring", + * "net_ring0", "net_pmdAnything,arg=0:arg2=1". * * The function parses the arguments string to get driver name and driver * arguments. @@ -134,8 +135,8 @@ int rte_eal_parse_devargs_str(const char *devargs_str, * "04:00.0,arg=val". * * For virtual devices, the format of arguments string is "DRIVER_NAME*" - * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring", - * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". The validity of the + * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring", + * "net_ring0", "net_pmdAnything,arg=0:arg2=1". The validity of the * driver name is not checked by this function, it is done when probing * the drivers. * diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h index a71d6f57..d150b9dd 100644 --- a/lib/librte_eal/common/include/rte_eal.h +++ b/lib/librte_eal/common/include/rte_eal.h @@ -44,6 +44,7 @@ #include #include +#include #ifdef __cplusplus extern "C" { @@ -252,6 +253,9 @@ static inline int rte_gettid(void) return RTE_PER_LCORE(_thread_id); } +#define RTE_INIT(func) \ +static void __attribute__((constructor, used)) func(void) + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h index ff11ef3a..fd3c6eff 100644 --- a/lib/librte_eal/common/include/rte_interrupts.h +++ b/lib/librte_eal/common/include/rte_interrupts.h @@ -34,6 +34,8 @@ #ifndef _RTE_INTERRUPTS_H_ #define _RTE_INTERRUPTS_H_ +#include + /** * @file * diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h index b1add04c..29f7d192 100644 --- a/lib/librte_eal/common/include/rte_log.h +++ b/lib/librte_eal/common/include/rte_log.h @@ -42,8 +42,6 @@ * This file provides a log API to RTE applications. */ -#include "rte_common.h" /* for __rte_deprecated macro */ - #ifdef __cplusplus extern "C" { #endif @@ -56,7 +54,7 @@ extern "C" { struct rte_logs { uint32_t type; /**< Bitfield with enabled logs. */ uint32_t level; /**< Log level. */ - FILE *file; /**< Pointer to current FILE* for logs. */ + FILE *file; /**< Output file set by rte_openlog_stream, or NULL. */ }; /** Global log informations */ @@ -102,9 +100,6 @@ extern struct rte_logs rte_logs; #define RTE_LOG_INFO 7U /**< Informational. */ #define RTE_LOG_DEBUG 8U /**< Debug-level messages. */ -/** The default log stream. */ -extern FILE *eal_default_log_stream; - /** * Change the stream that will be used by the logging system. * @@ -180,45 +175,6 @@ int rte_log_cur_msg_loglevel(void); */ int rte_log_cur_msg_logtype(void); -/** - * @deprecated - * Enable or disable the history (enabled by default) - * - * @param enable - * true to enable, or 0 to disable history. - */ -__rte_deprecated -void rte_log_set_history(int enable); - -/** - * @deprecated - * Dump the log history to a file - * - * @param f - * A pointer to a file for output - */ -__rte_deprecated -void rte_log_dump_history(FILE *f); - -/** - * @deprecated - * Add a log message to the history. - * - * This function can be called from a user-defined log stream. It adds - * the given message in the history that can be dumped using - * rte_log_dump_history(). - * - * @param buf - * A data buffer containing the message to be saved in the history. - * @param size - * The length of the data buffer. - * @return - * - 0: Success. - * - (-ENOBUFS) if there is no room to store the message. - */ -__rte_deprecated -int rte_log_add_in_history(const char *buf, size_t size); - /** * Generates a log message. * diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h index 74bb78c7..008ce134 100644 --- a/lib/librte_eal/common/include/rte_malloc.h +++ b/lib/librte_eal/common/include/rte_malloc.h @@ -294,7 +294,7 @@ rte_malloc_get_socket_stats(int socket, /** * Dump statistics. * - * Dump for the specified type to the console. If the type argument is + * Dump for the specified type to a file. If the type argument is * NULL, all memory types will be dumped. * * @param f diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index 06611093..4aa5d1f7 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -44,6 +44,8 @@ #include #include +#include + #ifdef RTE_EXEC_ENV_LINUXAPP #include #endif @@ -54,6 +56,7 @@ extern "C" { #include +__extension__ enum rte_page_sizes { RTE_PGSIZE_4K = 1ULL << 12, RTE_PGSIZE_64K = 1ULL << 16, @@ -103,13 +106,11 @@ typedef uint64_t phys_addr_t; /**< Physical address definition. */ */ struct rte_memseg { phys_addr_t phys_addr; /**< Start physical address. */ + RTE_STD_C11 union { void *addr; /**< Start virtual address. */ uint64_t addr_64; /**< Makes sure addr is always 64 bits */ }; -#ifdef RTE_LIBRTE_IVSHMEM - phys_addr_t ioremap_addr; /**< Real physical address inside the VM */ -#endif size_t len; /**< Length of the segment. */ uint64_t hugepage_sz; /**< The pagesize of underlying memory */ int32_t socket_id; /**< NUMA socket ID. */ @@ -161,7 +162,7 @@ phys_addr_t rte_mem_virt2phy(const void *virt); const struct rte_memseg *rte_eal_get_physmem_layout(void); /** - * Dump the physical memory layout to the console. + * Dump the physical memory layout to a file. * * @param f * A pointer to a file for output diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h index f69b5a87..1d0827f4 100644 --- a/lib/librte_eal/common/include/rte_memzone.h +++ b/lib/librte_eal/common/include/rte_memzone.h @@ -53,6 +53,7 @@ #include #include +#include #ifdef __cplusplus extern "C" { @@ -78,13 +79,11 @@ struct rte_memzone { char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the memory zone. */ phys_addr_t phys_addr; /**< Start physical address. */ + RTE_STD_C11 union { void *addr; /**< Start virtual address. */ uint64_t addr_64; /**< Makes sure addr is always 64-bits */ }; -#ifdef RTE_LIBRTE_IVSHMEM - phys_addr_t ioremap_addr; /**< Real physical address inside the VM */ -#endif size_t len; /**< Length of the memzone. */ uint64_t hugepage_sz; /**< The page size of underlying memory */ @@ -256,12 +255,10 @@ const struct rte_memzone *rte_memzone_reserve_bounded(const char *name, /** * Free a memzone. * - * Note: an IVSHMEM zone cannot be freed. - * * @param mz * A pointer to the memzone * @return - * -EINVAL - invalid parameter, IVSHMEM memzone. + * -EINVAL - invalid parameter. * 0 - success */ int rte_memzone_free(const struct rte_memzone *mz); @@ -280,7 +277,7 @@ int rte_memzone_free(const struct rte_memzone *mz); const struct rte_memzone *rte_memzone_lookup(const char *name); /** - * Dump all reserved memzones to the console. + * Dump all reserved memzones to a file. * * @param f * A pointer to a file for output diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h index fa749626..9ce88472 100644 --- a/lib/librte_eal/common/include/rte_pci.h +++ b/lib/librte_eal/common/include/rte_pci.h @@ -82,7 +82,9 @@ extern "C" { #include #include +#include #include +#include TAILQ_HEAD(pci_device_list, rte_pci_device); /**< PCI devices in D-linked Q. */ TAILQ_HEAD(pci_driver_list, rte_pci_driver); /**< PCI drivers in D-linked Q. */ @@ -95,6 +97,7 @@ const char *pci_get_sysfs_path(void); /** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */ #define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 +#define PCI_PRI_STR_SIZE sizeof("XXXX:XX:XX.X") /** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */ #define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 @@ -105,15 +108,6 @@ const char *pci_get_sysfs_path(void); /** Nb. of values in PCI resource format. */ #define PCI_RESOURCE_FMT_NVAL 3 -/** - * A structure describing a PCI resource. - */ -struct rte_pci_resource { - uint64_t phys_addr; /**< Physical address, 0 if no resource. */ - uint64_t len; /**< Length of the resource. */ - void *addr; /**< Virtual address, NULL when not mapped. */ -}; - /** Maximum number of PCI resources. */ #define PCI_MAX_RESOURCE 6 @@ -155,14 +149,14 @@ enum rte_kernel_driver { */ struct rte_pci_device { TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */ + struct rte_device device; /**< Inherit core device */ struct rte_pci_addr addr; /**< PCI location. */ struct rte_pci_id id; /**< PCI ID. */ - struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE]; /**< PCI Memory Resource */ + struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE]; + /**< PCI Memory Resource */ struct rte_intr_handle intr_handle; /**< Interrupt handle */ struct rte_pci_driver *driver; /**< Associated driver */ uint16_t max_vfs; /**< sriov enable if not zero */ - int numa_node; /**< NUMA node connection */ - struct rte_devargs *devargs; /**< Device user arguments */ enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ }; @@ -193,21 +187,21 @@ struct rte_pci_driver; /** * Initialisation function for the driver called during PCI probing. */ -typedef int (pci_devinit_t)(struct rte_pci_driver *, struct rte_pci_device *); +typedef int (pci_probe_t)(struct rte_pci_driver *, struct rte_pci_device *); /** * Uninitialisation function for the driver called during hotplugging. */ -typedef int (pci_devuninit_t)(struct rte_pci_device *); +typedef int (pci_remove_t)(struct rte_pci_device *); /** * A structure describing a PCI driver. */ struct rte_pci_driver { TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */ - const char *name; /**< Driver name. */ - pci_devinit_t *devinit; /**< Device init. function. */ - pci_devuninit_t *devuninit; /**< Device uninit function. */ + struct rte_driver driver; /**< Inherit core driver. */ + pci_probe_t *probe; /**< Device Probe function. */ + pci_remove_t *remove; /**< Device Remove function. */ const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ uint32_t drv_flags; /**< Flags contolling handling of device. */ }; @@ -308,6 +302,28 @@ eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr) } #undef GET_PCIADDR_FIELD +/** + * Utility function to write a pci device name, this device name can later be + * used to retrieve the corresponding rte_pci_addr using eal_parse_pci_* + * BDF helpers. + * + * @param addr + * The PCI Bus-Device-Function address + * @param output + * The output buffer string + * @param size + * The output buffer size + */ +static inline void +rte_eal_pci_device_name(const struct rte_pci_addr *addr, + char *output, size_t size) +{ + RTE_VERIFY(size >= PCI_PRI_STR_SIZE); + RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT, + addr->domain, addr->bus, + addr->devid, addr->function) >= 0); +} + /* Compare two PCI device addresses. */ /** * Utility function to compare two PCI device addresses. @@ -442,7 +458,7 @@ int rte_eal_pci_probe_one(const struct rte_pci_addr *addr); * Close the single PCI device. * * Scan the content of the PCI bus, and find the pci device specified by pci - * address, then call the devuninit() function for registered driver that has a + * address, then call the remove() function for registered driver that has a * matching entry in its id_table for discovered device. * * @param addr @@ -470,6 +486,16 @@ void rte_eal_pci_dump(FILE *f); */ void rte_eal_pci_register(struct rte_pci_driver *driver); +/** Helper for PCI device registration from driver (eth, crypto) instance */ +#define RTE_PMD_REGISTER_PCI(nm, pci_drv) \ +RTE_INIT(pciinitfn_ ##nm); \ +static void pciinitfn_ ##nm(void) \ +{\ + (pci_drv).driver.name = RTE_STR(nm);\ + rte_eal_pci_register(&pci_drv); \ +} \ +RTE_PMD_EXPORT_NAME(nm, __COUNTER__) + /** * Unregister a PCI driver. * diff --git a/lib/librte_eal/common/include/rte_pci_dev_ids.h b/lib/librte_eal/common/include/rte_pci_dev_ids.h deleted file mode 100644 index 6ec8ae8c..00000000 --- a/lib/librte_eal/common/include/rte_pci_dev_ids.h +++ /dev/null @@ -1,326 +0,0 @@ -/*- - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * The full GNU General Public License is included in this distribution - * in the file called LICENSE.GPL. - * - * Contact Information: - * Intel Corporation - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#ifndef RTE_PCI_DEV_ID_DECL_IGB -#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) -#endif - -#ifndef RTE_PCI_DEV_ID_DECL_IGBVF -#define RTE_PCI_DEV_ID_DECL_IGBVF(vend, dev) -#endif - -#ifndef RTE_PCI_DEV_ID_DECL_IXGBE -#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) -#endif - -#ifndef RTE_PCI_DEV_ID_DECL_IXGBEVF -#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev) -#endif - -#ifndef PCI_VENDOR_ID_INTEL -/** Vendor ID used by Intel devices */ -#define PCI_VENDOR_ID_INTEL 0x8086 -#endif - -/******************** Physical IGB devices from e1000_hw.h ********************/ - -#define E1000_DEV_ID_82576 0x10C9 -#define E1000_DEV_ID_82576_FIBER 0x10E6 -#define E1000_DEV_ID_82576_SERDES 0x10E7 -#define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 -#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 -#define E1000_DEV_ID_82576_NS 0x150A -#define E1000_DEV_ID_82576_NS_SERDES 0x1518 -#define E1000_DEV_ID_82576_SERDES_QUAD 0x150D -#define E1000_DEV_ID_82575EB_COPPER 0x10A7 -#define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 -#define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 -#define E1000_DEV_ID_82580_COPPER 0x150E -#define E1000_DEV_ID_82580_FIBER 0x150F -#define E1000_DEV_ID_82580_SERDES 0x1510 -#define E1000_DEV_ID_82580_SGMII 0x1511 -#define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 -#define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 -#define E1000_DEV_ID_I350_COPPER 0x1521 -#define E1000_DEV_ID_I350_FIBER 0x1522 -#define E1000_DEV_ID_I350_SERDES 0x1523 -#define E1000_DEV_ID_I350_SGMII 0x1524 -#define E1000_DEV_ID_I350_DA4 0x1546 -#define E1000_DEV_ID_I210_COPPER 0x1533 -#define E1000_DEV_ID_I210_COPPER_OEM1 0x1534 -#define E1000_DEV_ID_I210_COPPER_IT 0x1535 -#define E1000_DEV_ID_I210_FIBER 0x1536 -#define E1000_DEV_ID_I210_SERDES 0x1537 -#define E1000_DEV_ID_I210_SGMII 0x1538 -#define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B -#define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C -#define E1000_DEV_ID_I211_COPPER 0x1539 -#define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40 -#define E1000_DEV_ID_I354_SGMII 0x1F41 -#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45 -#define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 -#define E1000_DEV_ID_DH89XXCC_SERDES 0x043A -#define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C -#define E1000_DEV_ID_DH89XXCC_SFP 0x0440 - -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_FIBER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES_QUAD) - -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER) - -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_FIBER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SGMII) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER_DUAL) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_QUAD_FIBER) - -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_FIBER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SGMII) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_DA4) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_OEM1) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_IT) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_FIBER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SGMII) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I211_COPPER) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_SGMII) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SGMII) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SERDES) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE) -RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SFP) - -/****************** Physical IXGBE devices from ixgbe_type.h ******************/ - -#define IXGBE_DEV_ID_82598 0x10B6 -#define IXGBE_DEV_ID_82598_BX 0x1508 -#define IXGBE_DEV_ID_82598AF_DUAL_PORT 0x10C6 -#define IXGBE_DEV_ID_82598AF_SINGLE_PORT 0x10C7 -#define IXGBE_DEV_ID_82598AT 0x10C8 -#define IXGBE_DEV_ID_82598AT2 0x150B -#define IXGBE_DEV_ID_82598EB_SFP_LOM 0x10DB -#define IXGBE_DEV_ID_82598EB_CX4 0x10DD -#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT 0x10EC -#define IXGBE_DEV_ID_82598_DA_DUAL_PORT 0x10F1 -#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM 0x10E1 -#define IXGBE_DEV_ID_82598EB_XF_LR 0x10F4 -#define IXGBE_DEV_ID_82599_KX4 0x10F7 -#define IXGBE_DEV_ID_82599_KX4_MEZZ 0x1514 -#define IXGBE_DEV_ID_82599_KR 0x1517 -#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE 0x10F8 -#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ 0x000C -#define IXGBE_DEV_ID_82599_CX4 0x10F9 -#define IXGBE_DEV_ID_82599_SFP 0x10FB -#define IXGBE_SUBDEV_ID_82599_SFP 0x11A9 -#define IXGBE_SUBDEV_ID_82599_RNDC 0x1F72 -#define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0 -#define IXGBE_SUBDEV_ID_82599_ECNA_DP 0x0470 -#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152A -#define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529 -#define IXGBE_DEV_ID_82599_SFP_EM 0x1507 -#define IXGBE_DEV_ID_82599_SFP_SF2 0x154D -#define IXGBE_DEV_ID_82599_SFP_SF_QP 0x154A -#define IXGBE_DEV_ID_82599_QSFP_SF_QP 0x1558 -#define IXGBE_DEV_ID_82599EN_SFP 0x1557 -#define IXGBE_DEV_ID_82599_XAUI_LOM 0x10FC -#define IXGBE_DEV_ID_82599_T3_LOM 0x151C -#define IXGBE_DEV_ID_82599_LS 0x154F -#define IXGBE_DEV_ID_X540T 0x1528 -#define IXGBE_DEV_ID_X540T1 0x1560 -#define IXGBE_DEV_ID_X550EM_X_SFP 0x15AC -#define IXGBE_DEV_ID_X550EM_X_10G_T 0x15AD -#define IXGBE_DEV_ID_X550EM_X_1G_T 0x15AE -#define IXGBE_DEV_ID_X550T 0x1563 -#define IXGBE_DEV_ID_X550T1 0x15D1 -#define IXGBE_DEV_ID_X550EM_A_KR 0x15C2 -#define IXGBE_DEV_ID_X550EM_A_KR_L 0x15C3 -#define IXGBE_DEV_ID_X550EM_A_SFP_N 0x15C4 -#define IXGBE_DEV_ID_X550EM_A_SGMII 0x15C6 -#define IXGBE_DEV_ID_X550EM_A_SGMII_L 0x15C7 -#define IXGBE_DEV_ID_X550EM_A_10G_T 0x15C8 -#define IXGBE_DEV_ID_X550EM_A_QSFP 0x15CA -#define IXGBE_DEV_ID_X550EM_A_QSFP_N 0x15CC -#define IXGBE_DEV_ID_X550EM_A_SFP 0x15CE -#define IXGBE_DEV_ID_X550EM_A_1G_T 0x15E4 -#define IXGBE_DEV_ID_X550EM_A_1G_T_L 0x15E5 -#define IXGBE_DEV_ID_X550EM_X_KX4 0x15AA -#define IXGBE_DEV_ID_X550EM_X_KR 0x15AB - -#ifdef RTE_NIC_BYPASS -#define IXGBE_DEV_ID_82599_BYPASS 0x155D -#endif - -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_BX) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ - IXGBE_DEV_ID_82598AF_SINGLE_PORT) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT2) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_CX4) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ - IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_XF_LR) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KR) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ - IXGBE_DEV_ID_82599_COMBO_BACKPLANE) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ - IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_CX4) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_SFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_RNDC) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_560FLR) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_ECNA_DP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_FCOE) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_EM) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF2) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF_QP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599EN_SFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_XAUI_LOM) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_T3_LOM) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_LS) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T1) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_SFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_10G_T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_1G_T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T1) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR_L) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SGMII) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SGMII_L) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_10G_T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP_N) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KX4) -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KR) - -#ifdef RTE_NIC_BYPASS -RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BYPASS) -#endif - -/****************** Virtual IGB devices from e1000_hw.h ******************/ - -#define E1000_DEV_ID_82576_VF 0x10CA -#define E1000_DEV_ID_82576_VF_HV 0x152D -#define E1000_DEV_ID_I350_VF 0x1520 -#define E1000_DEV_ID_I350_VF_HV 0x152F - -RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF) -RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF_HV) -RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF) -RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF_HV) - -/****************** Virtual IXGBE devices from ixgbe_type.h ******************/ - -#define IXGBE_DEV_ID_82599_VF 0x10ED -#define IXGBE_DEV_ID_82599_VF_HV 0x152E -#define IXGBE_DEV_ID_X540_VF 0x1515 -#define IXGBE_DEV_ID_X540_VF_HV 0x1530 -#define IXGBE_DEV_ID_X550_VF_HV 0x1564 -#define IXGBE_DEV_ID_X550_VF 0x1565 -#define IXGBE_DEV_ID_X550EM_A_VF 0x15C5 -#define IXGBE_DEV_ID_X550EM_A_VF_HV 0x15B4 -#define IXGBE_DEV_ID_X550EM_X_VF 0x15A8 -#define IXGBE_DEV_ID_X550EM_X_VF_HV 0x15A9 - -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF_HV) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF_HV) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF_HV) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF_HV) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF) -RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV) - -/* - * Undef all RTE_PCI_DEV_ID_DECL_* here. - */ -#undef RTE_PCI_DEV_ID_DECL_IGB -#undef RTE_PCI_DEV_ID_DECL_IGBVF -#undef RTE_PCI_DEV_ID_DECL_IXGBE -#undef RTE_PCI_DEV_ID_DECL_IXGBEVF diff --git a/lib/librte_eal/common/include/rte_tailq.h b/lib/librte_eal/common/include/rte_tailq.h index cc3c0f1d..3aae098a 100644 --- a/lib/librte_eal/common/include/rte_tailq.h +++ b/lib/librte_eal/common/include/rte_tailq.h @@ -107,7 +107,7 @@ struct rte_tailq_elem { RTE_TAILQ_CAST(rte_eal_tailq_lookup(name), struct_name) /** - * Dump tail queues to the console. + * Dump tail queues to a file. * * @param f * A pointer to a file for output @@ -148,8 +148,8 @@ struct rte_tailq_head *rte_eal_tailq_lookup(const char *name); int rte_eal_tailq_register(struct rte_tailq_elem *t); #define EAL_REGISTER_TAILQ(t) \ -void tailqinitfn_ ##t(void); \ -void __attribute__((constructor, used)) tailqinitfn_ ##t(void) \ +RTE_INIT(tailqinitfn_ ##t); \ +static void tailqinitfn_ ##t(void) \ { \ if (rte_eal_tailq_register(&t) < 0) \ rte_panic("Cannot initialize tailq: %s\n", t.name); \ diff --git a/lib/librte_eal/common/include/rte_time.h b/lib/librte_eal/common/include/rte_time.h index 4b13b9c1..28c6274c 100644 --- a/lib/librte_eal/common/include/rte_time.h +++ b/lib/librte_eal/common/include/rte_time.h @@ -31,6 +31,12 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifndef _RTE_TIME_H_ +#define _RTE_TIME_H_ + +#include +#include + #define NSEC_PER_SEC 1000000000L /** @@ -120,3 +126,5 @@ rte_ns_to_timespec(uint64_t nsec) return ts; } + +#endif /* _RTE_TIME_H_ */ diff --git a/lib/librte_eal/common/include/rte_vdev.h b/lib/librte_eal/common/include/rte_vdev.h new file mode 100644 index 00000000..784e837d --- /dev/null +++ b/lib/librte_eal/common/include/rte_vdev.h @@ -0,0 +1,102 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_VDEV_H +#define RTE_VDEV_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/** Double linked list of virtual device drivers. */ +TAILQ_HEAD(vdev_driver_list, rte_vdev_driver); + +/** + * Probe function called for each virtual device driver once. + */ +typedef int (rte_vdev_probe_t)(const char *name, const char *args); + +/** + * Remove function called for each virtual device driver once. + */ +typedef int (rte_vdev_remove_t)(const char *name); + +/** + * A virtual device driver abstraction. + */ +struct rte_vdev_driver { + TAILQ_ENTRY(rte_vdev_driver) next; /**< Next in list. */ + struct rte_driver driver; /**< Inherited general driver. */ + rte_vdev_probe_t *probe; /**< Virtual device probe function. */ + rte_vdev_remove_t *remove; /**< Virtual device remove function. */ +}; + +/** + * Register a virtual device driver. + * + * @param driver + * A pointer to a rte_vdev_driver structure describing the driver + * to be registered. + */ +void rte_eal_vdrv_register(struct rte_vdev_driver *driver); + +/** + * Unregister a virtual device driver. + * + * @param driver + * A pointer to a rte_vdev_driver structure describing the driver + * to be unregistered. + */ +void rte_eal_vdrv_unregister(struct rte_vdev_driver *driver); + +#define RTE_PMD_REGISTER_VDEV(nm, vdrv)\ +RTE_INIT(vdrvinitfn_ ##vdrv);\ +static const char *vdrvinit_ ## nm ## _alias;\ +static void vdrvinitfn_ ##vdrv(void)\ +{\ + (vdrv).driver.name = RTE_STR(nm);\ + (vdrv).driver.alias = vdrvinit_ ## nm ## _alias;\ + rte_eal_vdrv_register(&vdrv);\ +} \ +RTE_PMD_EXPORT_NAME(nm, __COUNTER__) + +#define RTE_PMD_REGISTER_ALIAS(nm, alias)\ +static const char *vdrvinit_ ## nm ## _alias = RTE_STR(alias) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index 8187dc7b..da204e63 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -45,6 +45,7 @@ extern "C" { #include #include +#include #include /** @@ -60,12 +61,12 @@ extern "C" { /** * Minor version/month number i.e. the mm in yy.mm.z */ -#define RTE_VER_MONTH 7 +#define RTE_VER_MONTH 11 /** * Patch level number i.e. the z in yy.mm.z */ -#define RTE_VER_MINOR 2 +#define RTE_VER_MINOR 0 /** * Extra string to be appended to version number diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c index 763fa324..267a4c6c 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -221,14 +221,6 @@ rte_eal_malloc_heap_init(void) for (ms = &mcfg->memseg[0], ms_cnt = 0; (ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0); ms_cnt++, ms++) { -#ifdef RTE_LIBRTE_IVSHMEM - /* - * if segment has ioremap address set, it's an IVSHMEM segment and - * it is not memory to allocate from. - */ - if (ms->ioremap_addr != 0) - continue; -#endif malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms); } diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 182729c0..4e206f09 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -37,19 +37,13 @@ ARCH_DIR ?= $(RTE_ARCH) EXPORT_MAP := rte_eal_version.map VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR) -LIBABIVER := 2 +LIBABIVER := 3 VPATH += $(RTE_SDK)/lib/librte_eal/common CFLAGS += -I$(SRCDIR)/include CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include -ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y) -# workaround for circular dependency eal -> ivshmem -> ring/mempool -> eal -CFLAGS += -I$(RTE_SDK)/lib/librte_ring -CFLAGS += -I$(RTE_SDK)/lib/librte_mempool -CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem -endif CFLAGS += $(WERROR_FLAGS) -O3 LDLIBS += -ldl @@ -76,9 +70,6 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c -ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y) -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_ivshmem.c -endif # from common dir SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c @@ -86,6 +77,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memzone.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_log.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_launch.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_vdev.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci_uio.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memory.c diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 3fb2188f..2075282e 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include #include @@ -238,7 +239,8 @@ rte_eal_config_attach(void) mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config), PROT_READ, MAP_SHARED, mem_cfg_fd, 0); if (mem_config == MAP_FAILED) - rte_panic("Cannot mmap memory for rte_config\n"); + rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n", + errno, strerror(errno)); rte_config.mem_config = mem_config; } @@ -263,9 +265,17 @@ rte_eal_config_reattach(void) mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr, sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); + if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) { + if (mem_config != MAP_FAILED) + /* errno is stale, don't use */ + rte_panic("Cannot mmap memory for rte_config at [%p], got [%p]" + " - please use '--base-virtaddr' option\n", + rte_mem_cfg_addr, mem_config); + else + rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n", + errno, strerror(errno)); + } close(mem_cfg_fd); - if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) - rte_panic("Cannot mmap memory for rte_config\n"); rte_config.mem_config = mem_config; } @@ -740,6 +750,9 @@ rte_eal_init(int argc, char **argv) char cpuset[RTE_CPU_AFFINITY_STR_LEN]; char thread_name[RTE_MAX_THREAD_NAME_LEN]; + /* checks if the machine is adequate */ + rte_cpu_check_supported(); + if (!rte_atomic32_test_and_set(&run_once)) return -1; @@ -748,9 +761,6 @@ rte_eal_init(int argc, char **argv) thread_id = pthread_self(); - if (rte_eal_log_early_init() < 0) - rte_panic("Cannot init early logs\n"); - eal_log_level_parse(argc, argv); /* set log level as early as possible */ @@ -789,6 +799,9 @@ rte_eal_init(int argc, char **argv) rte_config_init(); + if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) + rte_panic("Cannot init logs\n"); + if (rte_eal_pci_init() < 0) rte_panic("Cannot init PCI\n"); @@ -797,11 +810,6 @@ rte_eal_init(int argc, char **argv) rte_panic("Cannot init VFIO\n"); #endif -#ifdef RTE_LIBRTE_IVSHMEM - if (rte_eal_ivshmem_init() < 0) - rte_panic("Cannot init IVSHMEM\n"); -#endif - if (rte_eal_memory_init() < 0) rte_panic("Cannot init memory\n"); @@ -814,14 +822,6 @@ rte_eal_init(int argc, char **argv) if (rte_eal_tailqs_init() < 0) rte_panic("Cannot init tail queues for objects\n"); -#ifdef RTE_LIBRTE_IVSHMEM - if (rte_eal_ivshmem_obj_init() < 0) - rte_panic("Cannot init IVSHMEM objects\n"); -#endif - - if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) - rte_panic("Cannot init logs\n"); - if (rte_eal_alarm_init() < 0) rte_panic("Cannot init interrupt-handling thread\n"); diff --git a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c deleted file mode 100644 index 67b3caf2..00000000 --- a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c +++ /dev/null @@ -1,954 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef RTE_LIBRTE_IVSHMEM /* hide it from coverage */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "eal_internal_cfg.h" -#include "eal_private.h" - -#define PCI_VENDOR_ID_IVSHMEM 0x1Af4 -#define PCI_DEVICE_ID_IVSHMEM 0x1110 - -#define IVSHMEM_MAGIC 0x0BADC0DE - -#define IVSHMEM_RESOURCE_PATH "/sys/bus/pci/devices/%04x:%02x:%02x.%x/resource2" -#define IVSHMEM_CONFIG_PATH "/var/run/.%s_ivshmem_config" - -#define PHYS 0x1 -#define VIRT 0x2 -#define IOREMAP 0x4 -#define FULL (PHYS|VIRT|IOREMAP) - -#define METADATA_SIZE_ALIGNED \ - (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz)) - -#define CONTAINS(x,y)\ - (((y).addr_64 >= (x).addr_64) && ((y).addr_64 < (x).addr_64 + (x).len)) - -#define DIM(x) (sizeof(x)/sizeof(x[0])) - -struct ivshmem_pci_device { - char path[PATH_MAX]; - phys_addr_t ioremap_addr; -}; - -/* data type to store in config */ -struct ivshmem_segment { - struct rte_ivshmem_metadata_entry entry; - uint64_t align; - char path[PATH_MAX]; -}; -struct ivshmem_shared_config { - struct ivshmem_segment segment[RTE_MAX_MEMSEG]; - uint32_t segment_idx; - struct ivshmem_pci_device pci_devs[RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS]; - uint32_t pci_devs_idx; -}; -static struct ivshmem_shared_config * ivshmem_config; -static int memseg_idx; -static int pagesz; - -/* Tailq heads to add rings to */ -TAILQ_HEAD(rte_ring_list, rte_tailq_entry); - -/* - * Utility functions - */ - -static int -is_ivshmem_device(struct rte_pci_device * dev) -{ - return dev->id.vendor_id == PCI_VENDOR_ID_IVSHMEM - && dev->id.device_id == PCI_DEVICE_ID_IVSHMEM; -} - -static void * -map_metadata(int fd, uint64_t len) -{ - size_t metadata_len = sizeof(struct rte_ivshmem_metadata); - size_t aligned_len = METADATA_SIZE_ALIGNED; - - return mmap(NULL, metadata_len, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, len - aligned_len); -} - -static void -unmap_metadata(void * ptr) -{ - munmap(ptr, sizeof(struct rte_ivshmem_metadata)); -} - -static int -has_ivshmem_metadata(int fd, uint64_t len) -{ - struct rte_ivshmem_metadata metadata; - void * ptr; - - ptr = map_metadata(fd, len); - - if (ptr == MAP_FAILED) - return -1; - - metadata = *(struct rte_ivshmem_metadata*) (ptr); - - unmap_metadata(ptr); - - return metadata.magic_number == IVSHMEM_MAGIC; -} - -static void -remove_segment(struct ivshmem_segment * ms, int len, int idx) -{ - int i; - - for (i = idx; i < len - 1; i++) - memcpy(&ms[i], &ms[i+1], sizeof(struct ivshmem_segment)); - memset(&ms[len-1], 0, sizeof(struct ivshmem_segment)); -} - -static int -overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2) -{ - uint64_t start1, end1, start2, end2; - uint64_t p_start1, p_end1, p_start2, p_end2; - uint64_t i_start1, i_end1, i_start2, i_end2; - int result = 0; - - /* gather virtual addresses */ - start1 = mz1->addr_64; - end1 = mz1->addr_64 + mz1->len; - start2 = mz2->addr_64; - end2 = mz2->addr_64 + mz2->len; - - /* gather physical addresses */ - p_start1 = mz1->phys_addr; - p_end1 = mz1->phys_addr + mz1->len; - p_start2 = mz2->phys_addr; - p_end2 = mz2->phys_addr + mz2->len; - - /* gather ioremap addresses */ - i_start1 = mz1->ioremap_addr; - i_end1 = mz1->ioremap_addr + mz1->len; - i_start2 = mz2->ioremap_addr; - i_end2 = mz2->ioremap_addr + mz2->len; - - /* check for overlap in virtual addresses */ - if (start1 >= start2 && start1 < end2) - result |= VIRT; - if (start2 >= start1 && start2 < end1) - result |= VIRT; - - /* check for overlap in physical addresses */ - if (p_start1 >= p_start2 && p_start1 < p_end2) - result |= PHYS; - if (p_start2 >= p_start1 && p_start2 < p_end1) - result |= PHYS; - - /* check for overlap in ioremap addresses */ - if (i_start1 >= i_start2 && i_start1 < i_end2) - result |= IOREMAP; - if (i_start2 >= i_start1 && i_start2 < i_end1) - result |= IOREMAP; - - return result; -} - -static int -adjacent(const struct rte_memzone * mz1, const struct rte_memzone * mz2) -{ - uint64_t start1, end1, start2, end2; - uint64_t p_start1, p_end1, p_start2, p_end2; - uint64_t i_start1, i_end1, i_start2, i_end2; - int result = 0; - - /* gather virtual addresses */ - start1 = mz1->addr_64; - end1 = mz1->addr_64 + mz1->len; - start2 = mz2->addr_64; - end2 = mz2->addr_64 + mz2->len; - - /* gather physical addresses */ - p_start1 = mz1->phys_addr; - p_end1 = mz1->phys_addr + mz1->len; - p_start2 = mz2->phys_addr; - p_end2 = mz2->phys_addr + mz2->len; - - /* gather ioremap addresses */ - i_start1 = mz1->ioremap_addr; - i_end1 = mz1->ioremap_addr + mz1->len; - i_start2 = mz2->ioremap_addr; - i_end2 = mz2->ioremap_addr + mz2->len; - - /* check if segments are virtually adjacent */ - if (start1 == end2) - result |= VIRT; - if (start2 == end1) - result |= VIRT; - - /* check if segments are physically adjacent */ - if (p_start1 == p_end2) - result |= PHYS; - if (p_start2 == p_end1) - result |= PHYS; - - /* check if segments are ioremap-adjacent */ - if (i_start1 == i_end2) - result |= IOREMAP; - if (i_start2 == i_end1) - result |= IOREMAP; - - return result; -} - -static int -has_adjacent_segments(struct ivshmem_segment * ms, int len) -{ - int i, j; - - for (i = 0; i < len; i++) - for (j = i + 1; j < len; j++) { - /* we're only interested in fully adjacent segments; partially - * adjacent segments can coexist. - */ - if (adjacent(&ms[i].entry.mz, &ms[j].entry.mz) == FULL) - return 1; - } - return 0; -} - -static int -has_overlapping_segments(struct ivshmem_segment * ms, int len) -{ - int i, j; - - for (i = 0; i < len; i++) - for (j = i + 1; j < len; j++) - if (overlap(&ms[i].entry.mz, &ms[j].entry.mz)) - return 1; - return 0; -} - -static int -seg_compare(const void * a, const void * b) -{ - const struct ivshmem_segment * s1 = (const struct ivshmem_segment*) a; - const struct ivshmem_segment * s2 = (const struct ivshmem_segment*) b; - - /* move unallocated zones to the end */ - if (s1->entry.mz.addr == NULL && s2->entry.mz.addr == NULL) - return 0; - if (s1->entry.mz.addr == 0) - return 1; - if (s2->entry.mz.addr == 0) - return -1; - - return s1->entry.mz.phys_addr > s2->entry.mz.phys_addr; -} - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG -static void -entry_dump(struct rte_ivshmem_metadata_entry *e) -{ - RTE_LOG(DEBUG, EAL, "\tvirt: %p-%p\n", e->mz.addr, - RTE_PTR_ADD(e->mz.addr, e->mz.len)); - RTE_LOG(DEBUG, EAL, "\tphys: 0x%" PRIx64 "-0x%" PRIx64 "\n", - e->mz.phys_addr, - e->mz.phys_addr + e->mz.len); - RTE_LOG(DEBUG, EAL, "\tio: 0x%" PRIx64 "-0x%" PRIx64 "\n", - e->mz.ioremap_addr, - e->mz.ioremap_addr + e->mz.len); - RTE_LOG(DEBUG, EAL, "\tlen: 0x%" PRIx64 "\n", e->mz.len); - RTE_LOG(DEBUG, EAL, "\toff: 0x%" PRIx64 "\n", e->offset); -} -#endif - - - -/* - * Actual useful code - */ - -/* read through metadata mapped from the IVSHMEM device */ -static int -read_metadata(char * path, int path_len, int fd, uint64_t flen) -{ - struct rte_ivshmem_metadata metadata; - struct rte_ivshmem_metadata_entry * entry; - int idx, i; - void * ptr; - - ptr = map_metadata(fd, flen); - - if (ptr == MAP_FAILED) - return -1; - - metadata = *(struct rte_ivshmem_metadata*) (ptr); - - unmap_metadata(ptr); - - RTE_LOG(DEBUG, EAL, "Parsing metadata for \"%s\"\n", metadata.name); - - idx = ivshmem_config->segment_idx; - - for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES && - idx <= RTE_MAX_MEMSEG; i++) { - - if (idx == RTE_MAX_MEMSEG) { - RTE_LOG(ERR, EAL, "Not enough memory segments!\n"); - return -1; - } - - entry = &metadata.entry[i]; - - /* stop on uninitialized memzone */ - if (entry->mz.len == 0) - break; - - /* copy metadata entry */ - memcpy(&ivshmem_config->segment[idx].entry, entry, - sizeof(struct rte_ivshmem_metadata_entry)); - - /* copy path */ - snprintf(ivshmem_config->segment[idx].path, path_len, "%s", path); - - idx++; - } - ivshmem_config->segment_idx = idx; - - return 0; -} - -/* check through each segment and look for adjacent or overlapping ones. */ -static int -cleanup_segments(struct ivshmem_segment * ms, int tbl_len) -{ - struct ivshmem_segment * s, * tmp; - int i, j, concat, seg_adjacent, seg_overlapping; - uint64_t start1, start2, end1, end2, p_start1, p_start2, i_start1, i_start2; - - qsort(ms, tbl_len, sizeof(struct ivshmem_segment), - seg_compare); - - while (has_overlapping_segments(ms, tbl_len) || - has_adjacent_segments(ms, tbl_len)) { - - for (i = 0; i < tbl_len; i++) { - s = &ms[i]; - - concat = 0; - - for (j = i + 1; j < tbl_len; j++) { - tmp = &ms[j]; - - /* check if this segment is overlapping with existing segment, - * or is adjacent to existing segment */ - seg_overlapping = overlap(&s->entry.mz, &tmp->entry.mz); - seg_adjacent = adjacent(&s->entry.mz, &tmp->entry.mz); - - /* check if segments fully overlap or are fully adjacent */ - if ((seg_adjacent == FULL) || (seg_overlapping == FULL)) { - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - RTE_LOG(DEBUG, EAL, "Concatenating segments\n"); - RTE_LOG(DEBUG, EAL, "Segment %i:\n", i); - entry_dump(&s->entry); - RTE_LOG(DEBUG, EAL, "Segment %i:\n", j); - entry_dump(&tmp->entry); -#endif - - start1 = s->entry.mz.addr_64; - start2 = tmp->entry.mz.addr_64; - p_start1 = s->entry.mz.phys_addr; - p_start2 = tmp->entry.mz.phys_addr; - i_start1 = s->entry.mz.ioremap_addr; - i_start2 = tmp->entry.mz.ioremap_addr; - end1 = s->entry.mz.addr_64 + s->entry.mz.len; - end2 = tmp->entry.mz.addr_64 + tmp->entry.mz.len; - - /* settle for minimum start address and maximum length */ - s->entry.mz.addr_64 = RTE_MIN(start1, start2); - s->entry.mz.phys_addr = RTE_MIN(p_start1, p_start2); - s->entry.mz.ioremap_addr = RTE_MIN(i_start1, i_start2); - s->entry.offset = RTE_MIN(s->entry.offset, tmp->entry.offset); - s->entry.mz.len = RTE_MAX(end1, end2) - s->entry.mz.addr_64; - concat = 1; - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - RTE_LOG(DEBUG, EAL, "Resulting segment:\n"); - entry_dump(&s->entry); - -#endif - } - /* if segments not fully overlap, we have an error condition. - * adjacent segments can coexist. - */ - else if (seg_overlapping > 0) { - RTE_LOG(ERR, EAL, "Segments %i and %i overlap!\n", i, j); -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - RTE_LOG(DEBUG, EAL, "Segment %i:\n", i); - entry_dump(&s->entry); - RTE_LOG(DEBUG, EAL, "Segment %i:\n", j); - entry_dump(&tmp->entry); -#endif - return -1; - } - if (concat) - break; - } - /* if we concatenated, remove segment at j */ - if (concat) { - remove_segment(ms, tbl_len, j); - tbl_len--; - break; - } - } - } - - return tbl_len; -} - -static int -create_shared_config(void) -{ - char path[PATH_MAX]; - int fd; - - /* build ivshmem config file path */ - snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH, - internal_config.hugefile_prefix); - - fd = open(path, O_CREAT | O_RDWR, 0600); - - if (fd < 0) { - RTE_LOG(ERR, EAL, "Could not open %s: %s\n", path, strerror(errno)); - return -1; - } - - /* try ex-locking first - if the file is locked, we have a problem */ - if (flock(fd, LOCK_EX | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "Locking %s failed: %s\n", path, strerror(errno)); - close(fd); - return -1; - } - - if (ftruncate(fd, sizeof(struct ivshmem_shared_config)) < 0) { - RTE_LOG(ERR, EAL, "ftruncate failed: %s\n", strerror(errno)); - return -1; - } - - ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config), - PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - - if (ivshmem_config == MAP_FAILED) - return -1; - - memset(ivshmem_config, 0, sizeof(struct ivshmem_shared_config)); - - /* change the exclusive lock we got earlier to a shared lock */ - if (flock(fd, LOCK_SH | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno)); - return -1; - } - - close(fd); - - return 0; -} - -/* open shared config file and, if present, map the config. - * having no config file is not an error condition, as we later check if - * ivshmem_config is NULL (if it is, that means nothing was mapped). */ -static int -open_shared_config(void) -{ - char path[PATH_MAX]; - int fd; - - /* build ivshmem config file path */ - snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH, - internal_config.hugefile_prefix); - - fd = open(path, O_RDONLY); - - /* if the file doesn't exist, just return success */ - if (fd < 0 && errno == ENOENT) - return 0; - /* else we have an error condition */ - else if (fd < 0) { - RTE_LOG(ERR, EAL, "Could not open %s: %s\n", - path, strerror(errno)); - return -1; - } - - /* try ex-locking first - if the lock *does* succeed, this means it's a - * stray config file, so it should be deleted. - */ - if (flock(fd, LOCK_EX | LOCK_NB) != -1) { - - /* if we can't remove the file, something is wrong */ - if (unlink(path) < 0) { - RTE_LOG(ERR, EAL, "Could not remove %s: %s\n", path, - strerror(errno)); - return -1; - } - - /* release the lock */ - flock(fd, LOCK_UN); - close(fd); - - /* return success as having a stray config file is equivalent to not - * having config file at all. - */ - return 0; - } - - ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config), - PROT_READ, MAP_SHARED, fd, 0); - - if (ivshmem_config == MAP_FAILED) - return -1; - - /* place a shared lock on config file */ - if (flock(fd, LOCK_SH | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno)); - return -1; - } - - close(fd); - - return 0; -} - -/* - * This function does the following: - * - * 1) Builds a table of ivshmem_segments with proper offset alignment - * 2) Cleans up that table so that we don't have any overlapping or adjacent - * memory segments - * 3) Creates memsegs from this table and maps them into memory. - */ -static inline int -map_all_segments(void) -{ - struct ivshmem_segment ms_tbl[RTE_MAX_MEMSEG]; - struct ivshmem_pci_device * pci_dev; - struct rte_mem_config * mcfg; - struct ivshmem_segment * seg; - int fd, fd_zero; - unsigned i, j; - struct rte_memzone mz; - struct rte_memseg ms; - void * base_addr; - uint64_t align, len; - phys_addr_t ioremap_addr; - - ioremap_addr = 0; - - memset(ms_tbl, 0, sizeof(ms_tbl)); - memset(&mz, 0, sizeof(struct rte_memzone)); - memset(&ms, 0, sizeof(struct rte_memseg)); - - /* first, build a table of memsegs to map, to avoid failed mmaps due to - * overlaps - */ - for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMSEG; i++) { - if (i == RTE_MAX_MEMSEG) { - RTE_LOG(ERR, EAL, "Too many segments requested!\n"); - return -1; - } - - seg = &ivshmem_config->segment[i]; - - /* copy segment to table */ - memcpy(&ms_tbl[i], seg, sizeof(struct ivshmem_segment)); - - /* find ioremap addr */ - for (j = 0; j < DIM(ivshmem_config->pci_devs); j++) { - pci_dev = &ivshmem_config->pci_devs[j]; - if (!strncmp(pci_dev->path, seg->path, sizeof(pci_dev->path))) { - ioremap_addr = pci_dev->ioremap_addr; - break; - } - } - if (ioremap_addr == 0) { - RTE_LOG(ERR, EAL, "Cannot find ioremap addr!\n"); - return -1; - } - - /* work out alignments */ - align = seg->entry.mz.addr_64 - - RTE_ALIGN_FLOOR(seg->entry.mz.addr_64, 0x1000); - len = RTE_ALIGN_CEIL(seg->entry.mz.len + align, 0x1000); - - /* save original alignments */ - ms_tbl[i].align = align; - - /* create a memory zone */ - mz.addr_64 = seg->entry.mz.addr_64 - align; - mz.len = len; - mz.hugepage_sz = seg->entry.mz.hugepage_sz; - mz.phys_addr = seg->entry.mz.phys_addr - align; - - /* find true physical address */ - mz.ioremap_addr = ioremap_addr + seg->entry.offset - align; - - ms_tbl[i].entry.offset = seg->entry.offset - align; - - memcpy(&ms_tbl[i].entry.mz, &mz, sizeof(struct rte_memzone)); - } - - /* clean up the segments */ - memseg_idx = cleanup_segments(ms_tbl, ivshmem_config->segment_idx); - - if (memseg_idx < 0) - return -1; - - mcfg = rte_eal_get_configuration()->mem_config; - - fd_zero = open("/dev/zero", O_RDWR); - - if (fd_zero < 0) { - RTE_LOG(ERR, EAL, "Cannot open /dev/zero: %s\n", strerror(errno)); - return -1; - } - - /* create memsegs and put them into DPDK memory */ - for (i = 0; i < (unsigned) memseg_idx; i++) { - - seg = &ms_tbl[i]; - - ms.addr_64 = seg->entry.mz.addr_64; - ms.hugepage_sz = seg->entry.mz.hugepage_sz; - ms.len = seg->entry.mz.len; - ms.nchannel = rte_memory_get_nchannel(); - ms.nrank = rte_memory_get_nrank(); - ms.phys_addr = seg->entry.mz.phys_addr; - ms.ioremap_addr = seg->entry.mz.ioremap_addr; - ms.socket_id = seg->entry.mz.socket_id; - - base_addr = mmap(ms.addr, ms.len, - PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_zero, 0); - - if (base_addr == MAP_FAILED || base_addr != ms.addr) { - RTE_LOG(ERR, EAL, "Cannot map /dev/zero!\n"); - return -1; - } - - fd = open(seg->path, O_RDWR); - - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", seg->path, - strerror(errno)); - return -1; - } - - munmap(ms.addr, ms.len); - - base_addr = mmap(ms.addr, ms.len, - PROT_READ | PROT_WRITE, MAP_SHARED, fd, - seg->entry.offset); - - - if (base_addr == MAP_FAILED || base_addr != ms.addr) { - RTE_LOG(ERR, EAL, "Cannot map segment into memory: " - "expected %p got %p (%s)\n", ms.addr, base_addr, - strerror(errno)); - return -1; - } - - RTE_LOG(DEBUG, EAL, "Memory segment mapped: %p (len %" PRIx64 ") at " - "offset 0x%" PRIx64 "\n", - ms.addr, ms.len, seg->entry.offset); - - /* put the pointers back into their real positions using original - * alignment */ - ms.addr_64 += seg->align; - ms.phys_addr += seg->align; - ms.ioremap_addr += seg->align; - ms.len -= seg->align; - - /* at this point, the rest of DPDK memory is not initialized, so we - * expect memsegs to be empty */ - memcpy(&mcfg->memseg[i], &ms, - sizeof(struct rte_memseg)); - - close(fd); - - RTE_LOG(DEBUG, EAL, "IVSHMEM segment found, size: 0x%lx\n", - ms.len); - } - - return 0; -} - -/* this happens at a later stage, after general EAL memory initialization */ -int -rte_eal_ivshmem_obj_init(void) -{ - struct rte_ring_list* ring_list = NULL; - struct rte_mem_config * mcfg; - struct ivshmem_segment * seg; - struct rte_memzone * mz; - struct rte_ring * r; - struct rte_tailq_entry *te; - unsigned i, ms, idx; - uint64_t offset; - - /* secondary process would not need any object discovery - it'll all - * already be in shared config */ - if (rte_eal_process_type() != RTE_PROC_PRIMARY || ivshmem_config == NULL) - return 0; - - /* check that we have an initialised ring tail queue */ - ring_list = RTE_TAILQ_LOOKUP(RTE_TAILQ_RING_NAME, rte_ring_list); - if (ring_list == NULL) { - RTE_LOG(ERR, EAL, "No rte_ring tailq found!\n"); - return -1; - } - - mcfg = rte_eal_get_configuration()->mem_config; - - /* create memzones */ - for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMZONE; i++) { - - seg = &ivshmem_config->segment[i]; - - /* add memzone */ - if (mcfg->memzone_cnt == RTE_MAX_MEMZONE) { - RTE_LOG(ERR, EAL, "No more memory zones available!\n"); - return -1; - } - - idx = mcfg->memzone_cnt; - - RTE_LOG(DEBUG, EAL, "Found memzone: '%s' at %p (len 0x%" PRIx64 ")\n", - seg->entry.mz.name, seg->entry.mz.addr, seg->entry.mz.len); - - memcpy(&mcfg->memzone[idx], &seg->entry.mz, - sizeof(struct rte_memzone)); - - /* find ioremap address */ - for (ms = 0; ms <= RTE_MAX_MEMSEG; ms++) { - if (ms == RTE_MAX_MEMSEG) { - RTE_LOG(ERR, EAL, "Physical address of segment not found!\n"); - return -1; - } - if (CONTAINS(mcfg->memseg[ms], mcfg->memzone[idx])) { - offset = mcfg->memzone[idx].addr_64 - - mcfg->memseg[ms].addr_64; - mcfg->memzone[idx].ioremap_addr = mcfg->memseg[ms].ioremap_addr + - offset; - break; - } - } - - mcfg->memzone_cnt++; - } - - rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); - - /* find rings */ - for (i = 0; i < mcfg->memzone_cnt; i++) { - mz = &mcfg->memzone[i]; - - /* check if memzone has a ring prefix */ - if (strncmp(mz->name, RTE_RING_MZ_PREFIX, - sizeof(RTE_RING_MZ_PREFIX) - 1) != 0) - continue; - - r = (struct rte_ring*) (mz->addr_64); - - te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0); - if (te == NULL) { - RTE_LOG(ERR, EAL, "Cannot allocate ring tailq entry!\n"); - return -1; - } - - te->data = (void *) r; - - TAILQ_INSERT_TAIL(ring_list, te, next); - - RTE_LOG(DEBUG, EAL, "Found ring: '%s' at %p\n", r->name, mz->addr); - } - rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - rte_memzone_dump(stdout); - rte_ring_list_dump(stdout); -#endif - - return 0; -} - -/* initialize ivshmem structures */ -int rte_eal_ivshmem_init(void) -{ - struct rte_pci_device * dev; - struct rte_pci_resource * res; - int fd, ret; - char path[PATH_MAX]; - - /* initialize everything to 0 */ - memset(path, 0, sizeof(path)); - ivshmem_config = NULL; - - pagesz = getpagesize(); - - RTE_LOG(DEBUG, EAL, "Searching for IVSHMEM devices...\n"); - - if (rte_eal_process_type() == RTE_PROC_SECONDARY) { - - if (open_shared_config() < 0) { - RTE_LOG(ERR, EAL, "Could not open IVSHMEM config!\n"); - return -1; - } - } - else { - - TAILQ_FOREACH(dev, &pci_device_list, next) { - - if (is_ivshmem_device(dev)) { - - /* IVSHMEM memory is always on BAR2 */ - res = &dev->mem_resource[2]; - - /* if we don't have a BAR2 */ - if (res->len == 0) - continue; - - /* construct pci device path */ - snprintf(path, sizeof(path), IVSHMEM_RESOURCE_PATH, - dev->addr.domain, dev->addr.bus, dev->addr.devid, - dev->addr.function); - - /* try to find memseg */ - fd = open(path, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Could not open %s\n", path); - return -1; - } - - /* check if it's a DPDK IVSHMEM device */ - ret = has_ivshmem_metadata(fd, res->len); - - /* is DPDK device */ - if (ret == 1) { - - /* config file creation is deferred until the first - * DPDK device is found. then, it has to be created - * only once. */ - if (ivshmem_config == NULL && - create_shared_config() < 0) { - RTE_LOG(ERR, EAL, "Could not create IVSHMEM config!\n"); - close(fd); - return -1; - } - - if (read_metadata(path, sizeof(path), fd, res->len) < 0) { - RTE_LOG(ERR, EAL, "Could not read metadata from" - " device %02x:%02x.%x!\n", dev->addr.bus, - dev->addr.devid, dev->addr.function); - close(fd); - return -1; - } - - if (ivshmem_config->pci_devs_idx == RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS) { - RTE_LOG(WARNING, EAL, - "IVSHMEM PCI device limit exceeded. Increase " - "CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS in " - "your config file.\n"); - break; - } - - RTE_LOG(INFO, EAL, "Found IVSHMEM device %02x:%02x.%x\n", - dev->addr.bus, dev->addr.devid, dev->addr.function); - - ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].ioremap_addr = res->phys_addr; - snprintf(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path, - sizeof(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path), - "%s", path); - - ivshmem_config->pci_devs_idx++; - } - /* failed to read */ - else if (ret < 0) { - RTE_LOG(ERR, EAL, "Could not read IVSHMEM device: %s\n", - strerror(errno)); - close(fd); - return -1; - } - /* not a DPDK device */ - else - RTE_LOG(DEBUG, EAL, "Skipping non-DPDK IVSHMEM device\n"); - - /* close the BAR fd */ - close(fd); - } - } - } - - /* ivshmem_config is not NULL only if config was created and/or mapped */ - if (ivshmem_config) { - if (map_all_segments() < 0) { - RTE_LOG(ERR, EAL, "Mapping IVSHMEM segments failed!\n"); - return -1; - } - } - else { - RTE_LOG(DEBUG, EAL, "No IVSHMEM configuration found! \n"); - } - - return 0; -} - -#endif diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c index d3911004..e3a50aa3 100644 --- a/lib/librte_eal/linuxapp/eal/eal_log.c +++ b/lib/librte_eal/linuxapp/eal/eal_log.c @@ -97,45 +97,7 @@ rte_eal_log_init(const char *id, int facility) openlog(id, LOG_NDELAY | LOG_PID, facility); - if (rte_eal_common_log_init(log_stream) < 0) - return -1; - - return 0; -} - -/* early logs */ - -/* - * early log function, used before rte_eal_log_init - */ -static ssize_t -early_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) -{ - ssize_t ret; - ret = fwrite(buf, size, 1, stdout); - fflush(stdout); - if (ret == 0) - return -1; - return ret; -} - -static cookie_io_functions_t early_log_func = { - .write = early_log_write, -}; -static FILE *early_log_stream; + eal_log_set_default(log_stream); -/* - * init the log library, called by rte_eal_init() to enable early - * logs - */ -int -rte_eal_log_early_init(void) -{ - early_log_stream = fopencookie(NULL, "w+", early_log_func); - if (early_log_stream == NULL) { - printf("Cannot configure early_log_stream\n"); - return -1; - } - rte_openlog_stream(early_log_stream); return 0; } diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index c04cff0c..a956bb22 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -376,25 +376,15 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, void *vma_addr = NULL; size_t vma_len = 0; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - RTE_SET_USED(vma_len); -#endif - for (i = 0; i < hpi->num_pages[0]; i++) { uint64_t hugepage_sz = hpi->hugepage_sz; if (orig) { hugepg_tbl[i].file_id = i; hugepg_tbl[i].size = hugepage_sz; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - eal_get_hugefile_temp_path(hugepg_tbl[i].filepath, - sizeof(hugepg_tbl[i].filepath), hpi->hugedir, - hugepg_tbl[i].file_id); -#else eal_get_hugefile_path(hugepg_tbl[i].filepath, sizeof(hugepg_tbl[i].filepath), hpi->hugedir, hugepg_tbl[i].file_id); -#endif hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0'; } #ifndef RTE_ARCH_64 @@ -408,8 +398,6 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, continue; } #endif - -#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS else if (vma_len == 0) { unsigned j, num_pages; @@ -439,10 +427,9 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, if (vma_addr == NULL) vma_len = hugepage_sz; } -#endif /* try to create hugepage file */ - fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755); + fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0600); if (fd < 0) { RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); @@ -505,169 +492,6 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, return i; } -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - -/* - * Remaps all hugepages into single file segments - */ -static int -remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) -{ - int fd; - unsigned i = 0, j, num_pages, page_idx = 0; - void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL; - size_t vma_len = 0; - size_t hugepage_sz = hpi->hugepage_sz; - size_t total_size, offset; - char filepath[MAX_HUGEPAGE_PATH]; - phys_addr_t physaddr; - int socket; - - while (i < hpi->num_pages[0]) { - -#ifndef RTE_ARCH_64 - /* for 32-bit systems, don't remap 1G pages and 16G pages, - * just reuse original map address as final map address. - */ - if ((hugepage_sz == RTE_PGSIZE_1G) - || (hugepage_sz == RTE_PGSIZE_16G)) { - hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; - hugepg_tbl[i].orig_va = NULL; - i++; - continue; - } -#endif - - /* reserve a virtual area for next contiguous - * physical block: count the number of - * contiguous physical pages. */ - for (j = i+1; j < hpi->num_pages[0] ; j++) { -#ifdef RTE_ARCH_PPC_64 - /* The physical addresses are sorted in descending - * order on PPC64 */ - if (hugepg_tbl[j].physaddr != - hugepg_tbl[j-1].physaddr - hugepage_sz) - break; -#else - if (hugepg_tbl[j].physaddr != - hugepg_tbl[j-1].physaddr + hugepage_sz) - break; -#endif - } - num_pages = j - i; - vma_len = num_pages * hugepage_sz; - - socket = hugepg_tbl[i].socket_id; - - /* get the biggest virtual memory area up to - * vma_len. If it fails, vma_addr is NULL, so - * let the kernel provide the address. */ - vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); - - /* If we can't find a big enough virtual area, work out how many pages - * we are going to get */ - if (vma_addr == NULL) - j = i + 1; - else if (vma_len != num_pages * hugepage_sz) { - num_pages = vma_len / hugepage_sz; - j = i + num_pages; - - } - - hugepg_tbl[page_idx].file_id = page_idx; - eal_get_hugefile_path(filepath, - sizeof(filepath), - hpi->hugedir, - hugepg_tbl[page_idx].file_id); - - /* try to create hugepage file */ - fd = open(filepath, O_CREAT | O_RDWR, 0755); - if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); - return -1; - } - - total_size = 0; - for (;i < j; i++) { - - /* unmap current segment */ - if (total_size > 0) - munmap(vma_addr, total_size); - - /* unmap original page */ - munmap(hugepg_tbl[i].orig_va, hugepage_sz); - unlink(hugepg_tbl[i].filepath); - - total_size += hugepage_sz; - - old_addr = vma_addr; - - /* map new, bigger segment, and populate page tables, - * the kernel fills this segment with zeros */ - vma_addr = mmap(vma_addr, total_size, - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0); - - if (vma_addr == MAP_FAILED || vma_addr != old_addr) { - RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); - close(fd); - return -1; - } - } - - /* set shared flock on the file. */ - if (flock(fd, LOCK_SH | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", - __func__, strerror(errno)); - close(fd); - return -1; - } - - snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s", - filepath); - - physaddr = rte_mem_virt2phy(vma_addr); - - if (physaddr == RTE_BAD_PHYS_ADDR) - return -1; - - hugepg_tbl[page_idx].final_va = vma_addr; - - hugepg_tbl[page_idx].physaddr = physaddr; - - hugepg_tbl[page_idx].repeated = num_pages; - - hugepg_tbl[page_idx].socket_id = socket; - - close(fd); - - /* verify the memory segment - that is, check that every VA corresponds - * to the physical address we expect to see - */ - for (offset = 0; offset < vma_len; offset += hugepage_sz) { - uint64_t expected_physaddr; - - expected_physaddr = hugepg_tbl[page_idx].physaddr + offset; - page_addr = RTE_PTR_ADD(vma_addr, offset); - physaddr = rte_mem_virt2phy(page_addr); - - if (physaddr != expected_physaddr) { - RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr " - "at %p (offset 0x%" PRIx64 ": 0x%" PRIx64 - " (expected 0x%" PRIx64 ")\n", - page_addr, offset, physaddr, expected_physaddr); - return -1; - } - } - - page_idx++; - } - - /* zero out the rest */ - memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file)); - return page_idx; -} -#else/* RTE_EAL_SINGLE_FILE_SEGMENTS=n */ - /* Unmap all hugepages from original mapping */ static int unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) @@ -681,7 +505,6 @@ unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info } return 0; } -#endif /* RTE_EAL_SINGLE_FILE_SEGMENTS */ /* * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge @@ -875,12 +698,6 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, for (page = 0; page < nrpages; page++) { struct hugepage_file *hp = &hugepg_tbl[page]; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - /* if this page was already cleared */ - if (hp->final_va == NULL) - continue; -#endif - /* find a page that matches the criteria */ if ((hp->size == hpi[size].hugepage_sz) && (hp->socket_id == (int) socket)) { @@ -889,11 +706,7 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, if (pages_found == hpi[size].num_pages[socket]) { uint64_t unmap_len; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - unmap_len = hp->size * hp->repeated; -#else unmap_len = hp->size; -#endif /* get start addr and len of the remaining segment */ munmap(hp->final_va, (size_t) unmap_len); @@ -904,50 +717,10 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, __func__, hp->filepath, strerror(errno)); return -1; } - } -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - /* else, check how much do we need to map */ - else { - int nr_pg_left = - hpi[size].num_pages[socket] - pages_found; - - /* if we need enough memory to fit into the segment */ - if (hp->repeated <= nr_pg_left) { - pages_found += hp->repeated; - } - /* truncate the segment */ - else { - uint64_t final_size = nr_pg_left * hp->size; - uint64_t seg_size = hp->repeated * hp->size; - - void * unmap_va = RTE_PTR_ADD(hp->final_va, - final_size); - int fd; - - munmap(unmap_va, seg_size - final_size); - - fd = open(hp->filepath, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - hp->filepath, strerror(errno)); - return -1; - } - if (ftruncate(fd, final_size) < 0) { - RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n", - hp->filepath, strerror(errno)); - return -1; - } - close(fd); - - pages_found += nr_pg_left; - hp->repeated = nr_pg_left; - } - } -#else - /* else, lock the page and skip */ - else + } else { + /* lock the page and skip */ pages_found++; -#endif + } } /* match page */ } /* foreach page */ @@ -1177,9 +950,6 @@ rte_eal_hugepage_init(void) int i, j, new_memseg; int nr_hugefiles, nr_hugepages = 0; void *addr; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - int new_pages_count[MAX_HUGEPAGE_SIZES]; -#endif test_proc_pagemap_readable(); @@ -1260,13 +1030,6 @@ rte_eal_hugepage_init(void) pages_old = hpi->num_pages[0]; pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, 1); if (pages_new < pages_old) { -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - RTE_LOG(ERR, EAL, - "%d not %d hugepages of size %u MB allocated\n", - pages_new, pages_old, - (unsigned)(hpi->hugepage_sz / 0x100000)); - goto fail; -#else RTE_LOG(DEBUG, EAL, "%d not %d hugepages of size %u MB allocated\n", pages_new, pages_old, @@ -1278,7 +1041,6 @@ rte_eal_hugepage_init(void) hpi->num_pages[0] = pages_new; if (pages_new == 0) continue; -#endif } /* find physical addresses and sockets for each hugepage */ @@ -1297,18 +1059,6 @@ rte_eal_hugepage_init(void) qsort(&tmp_hp[hp_offset], hpi->num_pages[0], sizeof(struct hugepage_file), cmp_physaddr); -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - /* remap all hugepages into single file segments */ - new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi); - if (new_pages_count[i] < 0){ - RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n", - (unsigned)(hpi->hugepage_sz / 0x100000)); - goto fail; - } - - /* we have processed a num of hugepages of this size, so inc offset */ - hp_offset += new_pages_count[i]; -#else /* remap all hugepages */ if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) != hpi->num_pages[0]) { @@ -1323,7 +1073,6 @@ rte_eal_hugepage_init(void) /* we have processed a num of hugepages of this size, so inc offset */ hp_offset += hpi->num_pages[0]; -#endif } huge_recover_sigbus(); @@ -1331,14 +1080,7 @@ rte_eal_hugepage_init(void) if (internal_config.memory == 0 && internal_config.force_sockets == 0) internal_config.memory = eal_get_hugepage_mem_size(); -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - nr_hugefiles = 0; - for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { - nr_hugefiles += new_pages_count[i]; - } -#else nr_hugefiles = nr_hugepages; -#endif /* clean out the numbers of pages */ @@ -1356,12 +1098,7 @@ rte_eal_hugepage_init(void) for (j = 0; j < nb_hpsizes; j++) { if (tmp_hp[i].size == internal_config.hugepage_info[j].hugepage_sz) { -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - internal_config.hugepage_info[j].num_pages[socket] += - tmp_hp[i].repeated; -#else internal_config.hugepage_info[j].num_pages[socket]++; -#endif } } } @@ -1436,15 +1173,8 @@ rte_eal_hugepage_init(void) free(tmp_hp); tmp_hp = NULL; - /* find earliest free memseg - this is needed because in case of IVSHMEM, - * segments might have already been initialized */ - for (j = 0; j < RTE_MAX_MEMSEG; j++) - if (mcfg->memseg[j].addr == NULL) { - /* move to previous segment and exit loop */ - j--; - break; - } - + /* first memseg index shall be 0 after incrementing it below */ + j = -1; for (i = 0; i < nr_hugefiles; i++) { new_memseg = 0; @@ -1482,11 +1212,7 @@ rte_eal_hugepage_init(void) mcfg->memseg[j].phys_addr = hugepage[i].physaddr; mcfg->memseg[j].addr = hugepage[i].final_va; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated; -#else mcfg->memseg[j].len = hugepage[i].size; -#endif mcfg->memseg[j].socket_id = hugepage[i].socket_id; mcfg->memseg[j].hugepage_sz = hugepage[i].size; } @@ -1598,15 +1324,6 @@ rte_eal_hugepage_attach(void) if (mcfg->memseg[s].len == 0) break; -#ifdef RTE_LIBRTE_IVSHMEM - /* - * if segment has ioremap address set, it's an IVSHMEM segment and - * doesn't need mapping as it was already mapped earlier - */ - if (mcfg->memseg[s].ioremap_addr != 0) - continue; -#endif - /* * fdzero is mmapped to get a contiguous block of virtual * addresses of the appropriate memseg size. @@ -1616,13 +1333,21 @@ rte_eal_hugepage_attach(void) PROT_READ, MAP_PRIVATE, fd_zero, 0); if (base_addr == MAP_FAILED || base_addr != mcfg->memseg[s].addr) { - RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " - "in /dev/zero to requested address [%p]: '%s'\n", - (unsigned long long)mcfg->memseg[s].len, - mcfg->memseg[s].addr, strerror(errno)); max_seg = s; - if (base_addr != MAP_FAILED) + if (base_addr != MAP_FAILED) { + /* errno is stale, don't use */ + RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " + "in /dev/zero at [%p], got [%p] - " + "please use '--base-virtaddr' option\n", + (unsigned long long)mcfg->memseg[s].len, + mcfg->memseg[s].addr, base_addr); munmap(base_addr, mcfg->memseg[s].len); + } else { + RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " + "in /dev/zero at [%p]: '%s'\n", + (unsigned long long)mcfg->memseg[s].len, + mcfg->memseg[s].addr, strerror(errno)); + } if (aslr_enabled() > 0) { RTE_LOG(ERR, EAL, "It is recommended to " "disable ASLR in the kernel " @@ -1648,16 +1373,6 @@ rte_eal_hugepage_attach(void) void *addr, *base_addr; uintptr_t offset = 0; size_t mapping_size; -#ifdef RTE_LIBRTE_IVSHMEM - /* - * if segment has ioremap address set, it's an IVSHMEM segment and - * doesn't need mapping as it was already mapped earlier - */ - if (mcfg->memseg[s].ioremap_addr != 0) { - s++; - continue; - } -#endif /* * free previously mapped memory so we can map the * hugepages into the space @@ -1676,11 +1391,7 @@ rte_eal_hugepage_attach(void) hp[i].filepath); goto error; } -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - mapping_size = hp[i].size * hp[i].repeated; -#else mapping_size = hp[i].size; -#endif addr = mmap(RTE_PTR_ADD(base_addr, offset), mapping_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index cd9de7cc..876ba381 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -350,13 +350,13 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, dirname); if (access(filename, R_OK) != 0) { /* if no NUMA support, set default to 0 */ - dev->numa_node = 0; + dev->device.numa_node = 0; } else { if (eal_parse_sysfs_value(filename, &tmp) < 0) { free(dev); return -1; } - dev->numa_node = tmp; + dev->device.numa_node = tmp; } /* parse resources */ @@ -390,6 +390,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, /* device is valid, add in list (sorted) */ if (TAILQ_EMPTY(&pci_device_list)) { + rte_eal_device_insert(&dev->device); TAILQ_INSERT_TAIL(&pci_device_list, dev, next); } else { struct rte_pci_device *dev2; @@ -402,6 +403,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, if (ret < 0) { TAILQ_INSERT_BEFORE(dev2, dev, next); + rte_eal_device_insert(&dev->device); } else { /* already registered */ dev2->kdrv = dev->kdrv; dev2->max_vfs = dev->max_vfs; @@ -411,12 +413,26 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, } return 0; } + rte_eal_device_insert(&dev->device); TAILQ_INSERT_TAIL(&pci_device_list, dev, next); } return 0; } +int +pci_update_device(const struct rte_pci_addr *addr) +{ + char filename[PATH_MAX]; + + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT, + pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid, + addr->function); + + return pci_scan_one(filename, addr->domain, addr->bus, addr->devid, + addr->function); +} + /* * split up a pci address into its constituent parts. */ @@ -743,9 +759,6 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) int rte_eal_pci_init(void) { - TAILQ_INIT(&pci_driver_list); - TAILQ_INIT(&pci_device_list); - /* for debug purposes, PCI can be disabled */ if (internal_config.no_pci) return 0; diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h index 3dacbff8..d459bf48 100644 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h @@ -82,6 +82,7 @@ struct rte_epoll_event { /** Handle for interrupts. */ struct rte_intr_handle { + RTE_STD_C11 union { int vfio_dev_fd; /**< VFIO device file descriptor */ int uio_cfg_fd; /**< UIO config file descriptor diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h index 2acdfd9b..09713b0c 100644 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h @@ -61,6 +61,9 @@ #ifdef __KERNEL__ #include +#define RTE_STD_C11 +#else +#include #endif /** @@ -85,6 +88,7 @@ enum rte_kni_req_id { */ struct rte_kni_request { uint32_t req_id; /**< Request id */ + RTE_STD_C11 union { uint32_t new_mtu; /**< New MTU */ uint8_t if_up; /**< 1: interface up, 0: interface down */ @@ -102,7 +106,7 @@ struct rte_kni_fifo { volatile unsigned read; /**< Next position to be read */ unsigned len; /**< Circular buffer length */ unsigned elem_size; /**< Pointer size - for 32/64 bit OS */ - void * volatile buffer[0]; /**< The buffer contains mbuf pointers */ + void *volatile buffer[]; /**< The buffer contains mbuf pointers */ }; /* @@ -111,7 +115,8 @@ struct rte_kni_fifo { */ struct rte_kni_mbuf { void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE))); - char pad0[10]; + uint64_t buf_physaddr; + char pad0[2]; uint16_t data_off; /**< Start address of data in segment buffer. */ char pad1[2]; uint8_t nb_segs; /**< Number of segments. */ @@ -159,6 +164,7 @@ struct rte_kni_device_info { uint16_t group_id; /**< Group ID */ uint32_t core_id; /**< core ID to bind for kernel thread */ + __extension__ uint8_t force_bind : 1; /**< Flag for kernel thread binding */ /* mbuf size */ diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map index db8c9845..83721ba5 100644 --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map @@ -166,3 +166,15 @@ DPDK_16.07 { rte_thread_setname; } DPDK_16.04; + +DPDK_16.11 { + global: + + rte_delay_us_block; + rte_delay_us_callback_register; + rte_eal_dev_attach; + rte_eal_dev_detach; + rte_eal_vdrv_register; + rte_eal_vdrv_unregister; + +} DPDK_16.07; diff --git a/lib/librte_eal/linuxapp/kni/Makefile b/lib/librte_eal/linuxapp/kni/Makefile index 8cc6b61c..4e99e07e 100644 --- a/lib/librte_eal/linuxapp/kni/Makefile +++ b/lib/librte_eal/linuxapp/kni/Makefile @@ -76,14 +76,9 @@ SRCS-y += ethtool/igb/e1000_mbx.c SRCS-y += ethtool/igb/e1000_nvm.c SRCS-y += ethtool/igb/e1000_phy.c SRCS-y += ethtool/igb/igb_ethtool.c -SRCS-y += ethtool/igb/igb_hwmon.c SRCS-y += ethtool/igb/igb_main.c -SRCS-y += ethtool/igb/igb_debugfs.c SRCS-y += ethtool/igb/igb_param.c -SRCS-y += ethtool/igb/igb_procfs.c SRCS-y += ethtool/igb/igb_vmdq.c -#SRCS-y += ethtool/igb/igb_ptp.c -#SRCS-y += ethtool/igb/kcompat.c SRCS-y += kni_misc.c SRCS-y += kni_net.c diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h index 647ba3ce..78da08e5 100644 --- a/lib/librte_eal/linuxapp/kni/compat.h +++ b/lib/librte_eal/linuxapp/kni/compat.h @@ -19,13 +19,25 @@ #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) -#define sk_sleep(s) (s)->sk_sleep +#define sk_sleep(s) ((s)->sk_sleep) +#else +#define HAVE_SOCKET_WQ +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) +#define HAVE_STATIC_SOCK_MAP_FD +#else +#define kni_sock_map_fd(s) sock_map_fd(s, 0) #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) #define HAVE_CHANGE_CARRIER_CB #endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) +#define ether_addr_copy(dst, src) memcpy(dst, src, ETH_ALEN) +#endif + #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) #define HAVE_IOV_ITER_MSGHDR #endif @@ -35,6 +47,23 @@ #define HAVE_REBUILD_HEADER #endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) +#define HAVE_SK_ALLOC_KERN_PARAM +#endif + #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) #define HAVE_TRANS_START_HELPER #endif + +/* + * KNI uses NET_NAME_UNKNOWN macro to select correct version of alloc_netdev() + * For old kernels just backported the commit that enables the macro + * (685343fc3ba6) but still uses old API, it is required to undefine macro to + * select correct version of API, this is safe since KNI doesn't use the value. + * This fix is specific to RedHat/CentOS kernels. + */ +#if (defined(RHEL_RELEASE_CODE) && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34))) +#undef NET_NAME_UNKNOWN +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING b/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING deleted file mode 100644 index 5f297e5b..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING +++ /dev/null @@ -1,339 +0,0 @@ - -"This software program is licensed subject to the GNU General Public License -(GPL). Version 2, June 1991, available at -" - -GNU General Public License - -Version 2, June 1991 - -Copyright (C) 1989, 1991 Free Software Foundation, Inc. -59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - -Everyone is permitted to copy and distribute verbatim copies of this license -document, but changing it is not allowed. - -Preamble - -The licenses for most software are designed to take away your freedom to -share and change it. By contrast, the GNU General Public License is intended -to guarantee your freedom to share and change free software--to make sure -the software is free for all its users. This General Public License applies -to most of the Free Software Foundation's software and to any other program -whose authors commit to using it. (Some other Free Software Foundation -software is covered by the GNU Library General Public License instead.) You -can apply it to your programs, too. - -When we speak of free software, we are referring to freedom, not price. Our -General Public Licenses are designed to make sure that you have the freedom -to distribute copies of free software (and charge for this service if you -wish), that you receive source code or can get it if you want it, that you -can change the software or use pieces of it in new free programs; and that -you know you can do these things. - -To protect your rights, we need to make restrictions that forbid anyone to -deny you these rights or to ask you to surrender the rights. These -restrictions translate to certain responsibilities for you if you distribute -copies of the software, or if you modify it. - -For example, if you distribute copies of such a program, whether gratis or -for a fee, you must give the recipients all the rights that you have. You -must make sure that they, too, receive or can get the source code. And you -must show them these terms so they know their rights. - -We protect your rights with two steps: (1) copyright the software, and (2) -offer you this license which gives you legal permission to copy, distribute -and/or modify the software. - -Also, for each author's protection and ours, we want to make certain that -everyone understands that there is no warranty for this free software. If -the software is modified by someone else and passed on, we want its -recipients to know that what they have is not the original, so that any -problems introduced by others will not reflect on the original authors' -reputations. - -Finally, any free program is threatened constantly by software patents. We -wish to avoid the danger that redistributors of a free program will -individually obtain patent licenses, in effect making the program -proprietary. To prevent this, we have made it clear that any patent must be -licensed for everyone's free use or not licensed at all. - -The precise terms and conditions for copying, distribution and modification -follow. - -TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - -0. This License applies to any program or other work which contains a notice - placed by the copyright holder saying it may be distributed under the - terms of this General Public License. The "Program", below, refers to any - such program or work, and a "work based on the Program" means either the - Program or any derivative work under copyright law: that is to say, a - work containing the Program or a portion of it, either verbatim or with - modifications and/or translated into another language. (Hereinafter, - translation is included without limitation in the term "modification".) - Each licensee is addressed as "you". - - Activities other than copying, distribution and modification are not - covered by this License; they are outside its scope. The act of running - the Program is not restricted, and the output from the Program is covered - only if its contents constitute a work based on the Program (independent - of having been made by running the Program). Whether that is true depends - on what the Program does. - -1. You may copy and distribute verbatim copies of the Program's source code - as you receive it, in any medium, provided that you conspicuously and - appropriately publish on each copy an appropriate copyright notice and - disclaimer of warranty; keep intact all the notices that refer to this - License and to the absence of any warranty; and give any other recipients - of the Program a copy of this License along with the Program. - - You may charge a fee for the physical act of transferring a copy, and you - may at your option offer warranty protection in exchange for a fee. - -2. You may modify your copy or copies of the Program or any portion of it, - thus forming a work based on the Program, and copy and distribute such - modifications or work under the terms of Section 1 above, provided that - you also meet all of these conditions: - - * a) You must cause the modified files to carry prominent notices stating - that you changed the files and the date of any change. - - * b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any part - thereof, to be licensed as a whole at no charge to all third parties - under the terms of this License. - - * c) If the modified program normally reads commands interactively when - run, you must cause it, when started running for such interactive - use in the most ordinary way, to print or display an announcement - including an appropriate copyright notice and a notice that there is - no warranty (or else, saying that you provide a warranty) and that - users may redistribute the program under these conditions, and - telling the user how to view a copy of this License. (Exception: if - the Program itself is interactive but does not normally print such - an announcement, your work based on the Program is not required to - print an announcement.) - - These requirements apply to the modified work as a whole. If identifiable - sections of that work are not derived from the Program, and can be - reasonably considered independent and separate works in themselves, then - this License, and its terms, do not apply to those sections when you - distribute them as separate works. But when you distribute the same - sections as part of a whole which is a work based on the Program, the - distribution of the whole must be on the terms of this License, whose - permissions for other licensees extend to the entire whole, and thus to - each and every part regardless of who wrote it. - - Thus, it is not the intent of this section to claim rights or contest - your rights to work written entirely by you; rather, the intent is to - exercise the right to control the distribution of derivative or - collective works based on the Program. - - In addition, mere aggregation of another work not based on the Program - with the Program (or with a work based on the Program) on a volume of a - storage or distribution medium does not bring the other work under the - scope of this License. - -3. You may copy and distribute the Program (or a work based on it, under - Section 2) in object code or executable form under the terms of Sections - 1 and 2 above provided that you also do one of the following: - - * a) Accompany it with the complete corresponding machine-readable source - code, which must be distributed under the terms of Sections 1 and 2 - above on a medium customarily used for software interchange; or, - - * b) Accompany it with a written offer, valid for at least three years, - to give any third party, for a charge no more than your cost of - physically performing source distribution, a complete machine- - readable copy of the corresponding source code, to be distributed - under the terms of Sections 1 and 2 above on a medium customarily - used for software interchange; or, - - * c) Accompany it with the information you received as to the offer to - distribute corresponding source code. (This alternative is allowed - only for noncommercial distribution and only if you received the - program in object code or executable form with such an offer, in - accord with Subsection b above.) - - The source code for a work means the preferred form of the work for - making modifications to it. For an executable work, complete source code - means all the source code for all modules it contains, plus any - associated interface definition files, plus the scripts used to control - compilation and installation of the executable. However, as a special - exception, the source code distributed need not include anything that is - normally distributed (in either source or binary form) with the major - components (compiler, kernel, and so on) of the operating system on which - the executable runs, unless that component itself accompanies the - executable. - - If distribution of executable or object code is made by offering access - to copy from a designated place, then offering equivalent access to copy - the source code from the same place counts as distribution of the source - code, even though third parties are not compelled to copy the source - along with the object code. - -4. You may not copy, modify, sublicense, or distribute the Program except as - expressly provided under this License. Any attempt otherwise to copy, - modify, sublicense or distribute the Program is void, and will - automatically terminate your rights under this License. However, parties - who have received copies, or rights, from you under this License will not - have their licenses terminated so long as such parties remain in full - compliance. - -5. You are not required to accept this License, since you have not signed - it. However, nothing else grants you permission to modify or distribute - the Program or its derivative works. These actions are prohibited by law - if you do not accept this License. Therefore, by modifying or - distributing the Program (or any work based on the Program), you - indicate your acceptance of this License to do so, and all its terms and - conditions for copying, distributing or modifying the Program or works - based on it. - -6. Each time you redistribute the Program (or any work based on the - Program), the recipient automatically receives a license from the - original licensor to copy, distribute or modify the Program subject to - these terms and conditions. You may not impose any further restrictions - on the recipients' exercise of the rights granted herein. You are not - responsible for enforcing compliance by third parties to this License. - -7. If, as a consequence of a court judgment or allegation of patent - infringement or for any other reason (not limited to patent issues), - conditions are imposed on you (whether by court order, agreement or - otherwise) that contradict the conditions of this License, they do not - excuse you from the conditions of this License. If you cannot distribute - so as to satisfy simultaneously your obligations under this License and - any other pertinent obligations, then as a consequence you may not - distribute the Program at all. For example, if a patent license would - not permit royalty-free redistribution of the Program by all those who - receive copies directly or indirectly through you, then the only way you - could satisfy both it and this License would be to refrain entirely from - distribution of the Program. - - If any portion of this section is held invalid or unenforceable under any - particular circumstance, the balance of the section is intended to apply - and the section as a whole is intended to apply in other circumstances. - - It is not the purpose of this section to induce you to infringe any - patents or other property right claims or to contest validity of any - such claims; this section has the sole purpose of protecting the - integrity of the free software distribution system, which is implemented - by public license practices. Many people have made generous contributions - to the wide range of software distributed through that system in - reliance on consistent application of that system; it is up to the - author/donor to decide if he or she is willing to distribute software - through any other system and a licensee cannot impose that choice. - - This section is intended to make thoroughly clear what is believed to be - a consequence of the rest of this License. - -8. If the distribution and/or use of the Program is restricted in certain - countries either by patents or by copyrighted interfaces, the original - copyright holder who places the Program under this License may add an - explicit geographical distribution limitation excluding those countries, - so that distribution is permitted only in or among countries not thus - excluded. In such case, this License incorporates the limitation as if - written in the body of this License. - -9. The Free Software Foundation may publish revised and/or new versions of - the General Public License from time to time. Such new versions will be - similar in spirit to the present version, but may differ in detail to - address new problems or concerns. - - Each version is given a distinguishing version number. If the Program - specifies a version number of this License which applies to it and "any - later version", you have the option of following the terms and - conditions either of that version or of any later version published by - the Free Software Foundation. If the Program does not specify a version - number of this License, you may choose any version ever published by the - Free Software Foundation. - -10. If you wish to incorporate parts of the Program into other free programs - whose distribution conditions are different, write to the author to ask - for permission. For software which is copyrighted by the Free Software - Foundation, write to the Free Software Foundation; we sometimes make - exceptions for this. Our decision will be guided by the two goals of - preserving the free status of all derivatives of our free software and - of promoting the sharing and reuse of software generally. - - NO WARRANTY - -11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY - FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN - OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES - PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER - EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE - ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH - YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL - NECESSARY SERVICING, REPAIR OR CORRECTION. - -12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING - WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR - REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR - DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL - DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM - (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED - INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF - THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR - OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - -END OF TERMS AND CONDITIONS - -How to Apply These Terms to Your New Programs - -If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it free -software which everyone can redistribute and change under these terms. - -To do so, attach the following notices to the program. It is safest to -attach them to the start of each source file to most effectively convey the -exclusion of warranty; and each file should have at least the "copyright" -line and a pointer to where the full notice is found. - -one line to give the program's name and an idea of what it does. -Copyright (C) yyyy name of author - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2 of the License, or (at your option) -any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 -Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this when -it starts in an interactive mode: - -Gnomovision version 69, Copyright (C) year name of author Gnomovision comes -with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free -software, and you are welcome to redistribute it under certain conditions; -type 'show c' for details. - -The hypothetical commands 'show w' and 'show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may be -called something other than 'show w' and 'show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - -Yoyodyne, Inc., hereby disclaims all copyright interest in the program -'Gnomovision' (which makes passes at compilers) written by James Hacker. - -signature of Ty Coon, 1 April 1989 -Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General Public -License instead of this License. diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c index b8c9a13f..d558af20 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h index 1aec75ab..185ccdf1 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c index 6095d3b4..220c9a40 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h index b21294ec..55c8a5f4 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h index 63b228c5..d42c7998 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h index 347cef71..35886e93 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c index 1e9f3e6e..7e4c20a9 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h index 57b2eb56..b8fa70d0 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c index 4ee59ba9..74319def 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h index 6a1b0f52..3bcdd88c 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c index a1700398..51dfae5d 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h index c94b2185..0627f271 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c index 3ef0d98b..bd64429f 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h index bbf838c8..64685d9d 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c index 6188d007..1ce59154 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h index fe62785a..17bc53c3 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h index d1cf98e2..c1ab60c4 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c index 140a2a47..d8a77c45 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h index 5387c5e7..db24fb0b 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h index 0e083c54..830ec991 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h index e5554ca3..d077b49e 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c deleted file mode 100644 index c07f9f53..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "igb.h" diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c index af7e68a5..d7a987d5 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c deleted file mode 100644 index 07a1ae07..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c +++ /dev/null @@ -1,260 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "igb.h" -#include "e1000_82575.h" -#include "e1000_hw.h" -#ifdef IGB_HWMON -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_I2C_SUPPORT -static struct i2c_board_info i350_sensor_info = { - I2C_BOARD_INFO("i350bb", (0Xf8 >> 1)), -}; -#endif /* HAVE_I2C_SUPPORT */ - -/* hwmon callback functions */ -static ssize_t igb_hwmon_show_location(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, - dev_attr); - return sprintf(buf, "loc%u\n", - igb_attr->sensor->location); -} - -static ssize_t igb_hwmon_show_temp(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, - dev_attr); - unsigned int value; - - /* reset the temp field */ - igb_attr->hw->mac.ops.get_thermal_sensor_data(igb_attr->hw); - - value = igb_attr->sensor->temp; - - /* display millidegree */ - value *= 1000; - - return sprintf(buf, "%u\n", value); -} - -static ssize_t igb_hwmon_show_cautionthresh(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, - dev_attr); - unsigned int value = igb_attr->sensor->caution_thresh; - - /* display millidegree */ - value *= 1000; - - return sprintf(buf, "%u\n", value); -} - -static ssize_t igb_hwmon_show_maxopthresh(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, - dev_attr); - unsigned int value = igb_attr->sensor->max_op_thresh; - - /* display millidegree */ - value *= 1000; - - return sprintf(buf, "%u\n", value); -} - -/* igb_add_hwmon_attr - Create hwmon attr table for a hwmon sysfs file. - * @ adapter: pointer to the adapter structure - * @ offset: offset in the eeprom sensor data table - * @ type: type of sensor data to display - * - * For each file we want in hwmon's sysfs interface we need a device_attribute - * This is included in our hwmon_attr struct that contains the references to - * the data structures we need to get the data to display. - */ -static int igb_add_hwmon_attr(struct igb_adapter *adapter, - unsigned int offset, int type) { - int rc; - unsigned int n_attr; - struct hwmon_attr *igb_attr; - - n_attr = adapter->igb_hwmon_buff.n_hwmon; - igb_attr = &adapter->igb_hwmon_buff.hwmon_list[n_attr]; - - switch (type) { - case IGB_HWMON_TYPE_LOC: - igb_attr->dev_attr.show = igb_hwmon_show_location; - snprintf(igb_attr->name, sizeof(igb_attr->name), - "temp%u_label", offset); - break; - case IGB_HWMON_TYPE_TEMP: - igb_attr->dev_attr.show = igb_hwmon_show_temp; - snprintf(igb_attr->name, sizeof(igb_attr->name), - "temp%u_input", offset); - break; - case IGB_HWMON_TYPE_CAUTION: - igb_attr->dev_attr.show = igb_hwmon_show_cautionthresh; - snprintf(igb_attr->name, sizeof(igb_attr->name), - "temp%u_max", offset); - break; - case IGB_HWMON_TYPE_MAX: - igb_attr->dev_attr.show = igb_hwmon_show_maxopthresh; - snprintf(igb_attr->name, sizeof(igb_attr->name), - "temp%u_crit", offset); - break; - default: - rc = -EPERM; - return rc; - } - - /* These always the same regardless of type */ - igb_attr->sensor = - &adapter->hw.mac.thermal_sensor_data.sensor[offset]; - igb_attr->hw = &adapter->hw; - igb_attr->dev_attr.store = NULL; - igb_attr->dev_attr.attr.mode = S_IRUGO; - igb_attr->dev_attr.attr.name = igb_attr->name; - sysfs_attr_init(&igb_attr->dev_attr.attr); - rc = device_create_file(&adapter->pdev->dev, - &igb_attr->dev_attr); - if (rc == 0) - ++adapter->igb_hwmon_buff.n_hwmon; - - return rc; -} - -static void igb_sysfs_del_adapter(struct igb_adapter *adapter) -{ - int i; - - if (adapter == NULL) - return; - - for (i = 0; i < adapter->igb_hwmon_buff.n_hwmon; i++) { - device_remove_file(&adapter->pdev->dev, - &adapter->igb_hwmon_buff.hwmon_list[i].dev_attr); - } - - kfree(adapter->igb_hwmon_buff.hwmon_list); - - if (adapter->igb_hwmon_buff.device) - hwmon_device_unregister(adapter->igb_hwmon_buff.device); -} - -/* called from igb_main.c */ -void igb_sysfs_exit(struct igb_adapter *adapter) -{ - igb_sysfs_del_adapter(adapter); -} - -/* called from igb_main.c */ -int igb_sysfs_init(struct igb_adapter *adapter) -{ - struct hwmon_buff *igb_hwmon = &adapter->igb_hwmon_buff; - unsigned int i; - int n_attrs; - int rc = 0; -#ifdef HAVE_I2C_SUPPORT - struct i2c_client *client = NULL; -#endif /* HAVE_I2C_SUPPORT */ - - /* If this method isn't defined we don't support thermals */ - if (adapter->hw.mac.ops.init_thermal_sensor_thresh == NULL) - goto exit; - - /* Don't create thermal hwmon interface if no sensors present */ - rc = (adapter->hw.mac.ops.init_thermal_sensor_thresh(&adapter->hw)); - if (rc) - goto exit; -#ifdef HAVE_I2C_SUPPORT - /* init i2c_client */ - client = i2c_new_device(&adapter->i2c_adap, &i350_sensor_info); - if (client == NULL) { - dev_info(&adapter->pdev->dev, - "Failed to create new i2c device..\n"); - goto exit; - } - adapter->i2c_client = client; -#endif /* HAVE_I2C_SUPPORT */ - - /* Allocation space for max attributes - * max num sensors * values (loc, temp, max, caution) - */ - n_attrs = E1000_MAX_SENSORS * 4; - igb_hwmon->hwmon_list = kcalloc(n_attrs, sizeof(struct hwmon_attr), - GFP_KERNEL); - if (!igb_hwmon->hwmon_list) { - rc = -ENOMEM; - goto err; - } - - igb_hwmon->device = hwmon_device_register(&adapter->pdev->dev); - if (IS_ERR(igb_hwmon->device)) { - rc = PTR_ERR(igb_hwmon->device); - goto err; - } - - for (i = 0; i < E1000_MAX_SENSORS; i++) { - - /* Only create hwmon sysfs entries for sensors that have - * meaningful data. - */ - if (adapter->hw.mac.thermal_sensor_data.sensor[i].location == 0) - continue; - - /* Bail if any hwmon attr struct fails to initialize */ - rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_CAUTION); - rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_LOC); - rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_TEMP); - rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_MAX); - if (rc) - goto err; - } - - goto exit; - -err: - igb_sysfs_del_adapter(adapter); -exit: - return rc; -} -#endif /* IGB_HWMON */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c index f1dcc95b..f4dca5a3 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List @@ -1562,6 +1562,7 @@ static void igb_check_swap_media(struct igb_adapter *adapter) ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); connsw = E1000_READ_REG(hw, E1000_CONNSW); link = igb_has_link(adapter); + (void) link; /* need to live swap if current media is copper and we have fiber/serdes * to go to. diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c index f79ce7c1..c922ca2f 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c deleted file mode 100644 index 66236d29..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c +++ /dev/null @@ -1,363 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "igb.h" -#include "e1000_82575.h" -#include "e1000_hw.h" - -#ifdef IGB_PROCFS -#ifndef IGB_HWMON - -#include -#include -#include -#include -#include - -static struct proc_dir_entry *igb_top_dir = NULL; - - -bool igb_thermal_present(struct igb_adapter *adapter) -{ - s32 status; - struct e1000_hw *hw; - - if (adapter == NULL) - return false; - hw = &adapter->hw; - - /* - * Only set I2C bit-bang mode if an external thermal sensor is - * supported on this device. - */ - if (adapter->ets) { - status = e1000_set_i2c_bb(hw); - if (status != E1000_SUCCESS) - return false; - } - - status = hw->mac.ops.init_thermal_sensor_thresh(hw); - if (status != E1000_SUCCESS) - return false; - - return true; -} - - -static int igb_macburn(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct e1000_hw *hw; - struct igb_adapter *adapter = (struct igb_adapter *)data; - if (adapter == NULL) - return snprintf(page, count, "error: no adapter\n"); - - hw = &adapter->hw; - if (hw == NULL) - return snprintf(page, count, "error: no hw data\n"); - - return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n", - (unsigned int)hw->mac.perm_addr[0], - (unsigned int)hw->mac.perm_addr[1], - (unsigned int)hw->mac.perm_addr[2], - (unsigned int)hw->mac.perm_addr[3], - (unsigned int)hw->mac.perm_addr[4], - (unsigned int)hw->mac.perm_addr[5]); -} - -static int igb_macadmn(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct e1000_hw *hw; - struct igb_adapter *adapter = (struct igb_adapter *)data; - if (adapter == NULL) - return snprintf(page, count, "error: no adapter\n"); - - hw = &adapter->hw; - if (hw == NULL) - return snprintf(page, count, "error: no hw data\n"); - - return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n", - (unsigned int)hw->mac.addr[0], - (unsigned int)hw->mac.addr[1], - (unsigned int)hw->mac.addr[2], - (unsigned int)hw->mac.addr[3], - (unsigned int)hw->mac.addr[4], - (unsigned int)hw->mac.addr[5]); -} - -static int igb_numeports(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct e1000_hw *hw; - int ports; - struct igb_adapter *adapter = (struct igb_adapter *)data; - if (adapter == NULL) - return snprintf(page, count, "error: no adapter\n"); - - hw = &adapter->hw; - if (hw == NULL) - return snprintf(page, count, "error: no hw data\n"); - - ports = 4; - - return snprintf(page, count, "%d\n", ports); -} - -static int igb_porttype(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct igb_adapter *adapter = (struct igb_adapter *)data; - if (adapter == NULL) - return snprintf(page, count, "error: no adapter\n"); - - return snprintf(page, count, "%d\n", - test_bit(__IGB_DOWN, &adapter->state)); -} - -static int igb_therm_location(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct igb_therm_proc_data *therm_data = - (struct igb_therm_proc_data *)data; - - if (therm_data == NULL) - return snprintf(page, count, "error: no therm_data\n"); - - return snprintf(page, count, "%d\n", therm_data->sensor_data->location); -} - -static int igb_therm_maxopthresh(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct igb_therm_proc_data *therm_data = - (struct igb_therm_proc_data *)data; - - if (therm_data == NULL) - return snprintf(page, count, "error: no therm_data\n"); - - return snprintf(page, count, "%d\n", - therm_data->sensor_data->max_op_thresh); -} - -static int igb_therm_cautionthresh(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct igb_therm_proc_data *therm_data = - (struct igb_therm_proc_data *)data; - - if (therm_data == NULL) - return snprintf(page, count, "error: no therm_data\n"); - - return snprintf(page, count, "%d\n", - therm_data->sensor_data->caution_thresh); -} - -static int igb_therm_temp(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - s32 status; - struct igb_therm_proc_data *therm_data = - (struct igb_therm_proc_data *)data; - - if (therm_data == NULL) - return snprintf(page, count, "error: no therm_data\n"); - - status = e1000_get_thermal_sensor_data(therm_data->hw); - if (status != E1000_SUCCESS) - snprintf(page, count, "error: status %d returned\n", status); - - return snprintf(page, count, "%d\n", therm_data->sensor_data->temp); -} - -struct igb_proc_type{ - char name[32]; - int (*read)(char*, char**, off_t, int, int*, void*); -}; - -struct igb_proc_type igb_proc_entries[] = { - {"numeports", &igb_numeports}, - {"porttype", &igb_porttype}, - {"macburn", &igb_macburn}, - {"macadmn", &igb_macadmn}, - {"", NULL} -}; - -struct igb_proc_type igb_internal_entries[] = { - {"location", &igb_therm_location}, - {"temp", &igb_therm_temp}, - {"cautionthresh", &igb_therm_cautionthresh}, - {"maxopthresh", &igb_therm_maxopthresh}, - {"", NULL} -}; - -void igb_del_proc_entries(struct igb_adapter *adapter) -{ - int index, i; - char buf[16]; /* much larger than the sensor number will ever be */ - - if (igb_top_dir == NULL) - return; - - for (i = 0; i < E1000_MAX_SENSORS; i++) { - if (adapter->therm_dir[i] == NULL) - continue; - - for (index = 0; ; index++) { - if (igb_internal_entries[index].read == NULL) - break; - - remove_proc_entry(igb_internal_entries[index].name, - adapter->therm_dir[i]); - } - snprintf(buf, sizeof(buf), "sensor_%d", i); - remove_proc_entry(buf, adapter->info_dir); - } - - if (adapter->info_dir != NULL) { - for (index = 0; ; index++) { - if (igb_proc_entries[index].read == NULL) - break; - remove_proc_entry(igb_proc_entries[index].name, - adapter->info_dir); - } - remove_proc_entry("info", adapter->eth_dir); - } - - if (adapter->eth_dir != NULL) - remove_proc_entry(pci_name(adapter->pdev), igb_top_dir); -} - -/* called from igb_main.c */ -void igb_procfs_exit(struct igb_adapter *adapter) -{ - igb_del_proc_entries(adapter); -} - -int igb_procfs_topdir_init(void) -{ - igb_top_dir = proc_mkdir("driver/igb", NULL); - if (igb_top_dir == NULL) - return -ENOMEM; - - return 0; -} - -void igb_procfs_topdir_exit(void) -{ - remove_proc_entry("driver/igb", NULL); -} - -/* called from igb_main.c */ -int igb_procfs_init(struct igb_adapter *adapter) -{ - int rc = 0; - int i; - int index; - char buf[16]; /* much larger than the sensor number will ever be */ - - adapter->eth_dir = NULL; - adapter->info_dir = NULL; - for (i = 0; i < E1000_MAX_SENSORS; i++) - adapter->therm_dir[i] = NULL; - - if ( igb_top_dir == NULL ) { - rc = -ENOMEM; - goto fail; - } - - adapter->eth_dir = proc_mkdir(pci_name(adapter->pdev), igb_top_dir); - if (adapter->eth_dir == NULL) { - rc = -ENOMEM; - goto fail; - } - - adapter->info_dir = proc_mkdir("info", adapter->eth_dir); - if (adapter->info_dir == NULL) { - rc = -ENOMEM; - goto fail; - } - for (index = 0; ; index++) { - if (igb_proc_entries[index].read == NULL) { - break; - } - if (!(create_proc_read_entry(igb_proc_entries[index].name, - 0444, - adapter->info_dir, - igb_proc_entries[index].read, - adapter))) { - - rc = -ENOMEM; - goto fail; - } - } - if (igb_thermal_present(adapter) == false) - goto exit; - - for (i = 0; i < E1000_MAX_SENSORS; i++) { - - if (adapter->hw.mac.thermal_sensor_data.sensor[i].location== 0) - continue; - - snprintf(buf, sizeof(buf), "sensor_%d", i); - adapter->therm_dir[i] = proc_mkdir(buf, adapter->info_dir); - if (adapter->therm_dir[i] == NULL) { - rc = -ENOMEM; - goto fail; - } - for (index = 0; ; index++) { - if (igb_internal_entries[index].read == NULL) - break; - /* - * therm_data struct contains pointer the read func - * will be needing - */ - adapter->therm_data[i].hw = &adapter->hw; - adapter->therm_data[i].sensor_data = - &adapter->hw.mac.thermal_sensor_data.sensor[i]; - - if (!(create_proc_read_entry( - igb_internal_entries[index].name, - 0444, - adapter->therm_dir[i], - igb_internal_entries[index].read, - &adapter->therm_data[i]))) { - rc = -ENOMEM; - goto fail; - } - } - } - goto exit; - -fail: - igb_del_proc_entries(adapter); -exit: - return rc; -} - -#endif /* !IGB_HWMON */ -#endif /* IGB_PROCFS */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c deleted file mode 100644 index 454b70ce..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c +++ /dev/null @@ -1,944 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -/****************************************************************************** - Copyright(c) 2011 Richard Cochran for some of the - 82576 and 82580 code -******************************************************************************/ - -#include "igb.h" - -#include -#include -#include -#include - -#define INCVALUE_MASK 0x7fffffff -#define ISGN 0x80000000 - -/* - * The 82580 timesync updates the system timer every 8ns by 8ns, - * and this update value cannot be reprogrammed. - * - * Neither the 82576 nor the 82580 offer registers wide enough to hold - * nanoseconds time values for very long. For the 82580, SYSTIM always - * counts nanoseconds, but the upper 24 bits are not available. The - * frequency is adjusted by changing the 32 bit fractional nanoseconds - * register, TIMINCA. - * - * For the 82576, the SYSTIM register time unit is affect by the - * choice of the 24 bit TININCA:IV (incvalue) field. Five bits of this - * field are needed to provide the nominal 16 nanosecond period, - * leaving 19 bits for fractional nanoseconds. - * - * We scale the NIC clock cycle by a large factor so that relatively - * small clock corrections can be added or subtracted at each clock - * tick. The drawbacks of a large factor are a) that the clock - * register overflows more quickly (not such a big deal) and b) that - * the increment per tick has to fit into 24 bits. As a result we - * need to use a shift of 19 so we can fit a value of 16 into the - * TIMINCA register. - * - * - * SYSTIMH SYSTIML - * +--------------+ +---+---+------+ - * 82576 | 32 | | 8 | 5 | 19 | - * +--------------+ +---+---+------+ - * \________ 45 bits _______/ fract - * - * +----------+---+ +--------------+ - * 82580 | 24 | 8 | | 32 | - * +----------+---+ +--------------+ - * reserved \______ 40 bits _____/ - * - * - * The 45 bit 82576 SYSTIM overflows every - * 2^45 * 10^-9 / 3600 = 9.77 hours. - * - * The 40 bit 82580 SYSTIM overflows every - * 2^40 * 10^-9 / 60 = 18.3 minutes. - */ - -#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9) -#define IGB_PTP_TX_TIMEOUT (HZ * 15) -#define INCPERIOD_82576 (1 << E1000_TIMINCA_16NS_SHIFT) -#define INCVALUE_82576_MASK ((1 << E1000_TIMINCA_16NS_SHIFT) - 1) -#define INCVALUE_82576 (16 << IGB_82576_TSYNC_SHIFT) -#define IGB_NBITS_82580 40 - -/* - * SYSTIM read access for the 82576 - */ - -static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc) -{ - struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); - struct e1000_hw *hw = &igb->hw; - u64 val; - u32 lo, hi; - - lo = E1000_READ_REG(hw, E1000_SYSTIML); - hi = E1000_READ_REG(hw, E1000_SYSTIMH); - - val = ((u64) hi) << 32; - val |= lo; - - return val; -} - -/* - * SYSTIM read access for the 82580 - */ - -static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc) -{ - struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); - struct e1000_hw *hw = &igb->hw; - u64 val; - u32 lo, hi; - - /* The timestamp latches on lowest register read. For the 82580 - * the lowest register is SYSTIMR instead of SYSTIML. However we only - * need to provide nanosecond resolution, so we just ignore it. - */ - E1000_READ_REG(hw, E1000_SYSTIMR); - lo = E1000_READ_REG(hw, E1000_SYSTIML); - hi = E1000_READ_REG(hw, E1000_SYSTIMH); - - val = ((u64) hi) << 32; - val |= lo; - - return val; -} - -/* - * SYSTIM read access for I210/I211 - */ - -static void igb_ptp_read_i210(struct igb_adapter *adapter, struct timespec *ts) -{ - struct e1000_hw *hw = &adapter->hw; - u32 sec, nsec; - - /* The timestamp latches on lowest register read. For I210/I211, the - * lowest register is SYSTIMR. Since we only need to provide nanosecond - * resolution, we can ignore it. - */ - E1000_READ_REG(hw, E1000_SYSTIMR); - nsec = E1000_READ_REG(hw, E1000_SYSTIML); - sec = E1000_READ_REG(hw, E1000_SYSTIMH); - - ts->tv_sec = sec; - ts->tv_nsec = nsec; -} - -static void igb_ptp_write_i210(struct igb_adapter *adapter, - const struct timespec *ts) -{ - struct e1000_hw *hw = &adapter->hw; - - /* - * Writing the SYSTIMR register is not necessary as it only provides - * sub-nanosecond resolution. - */ - E1000_WRITE_REG(hw, E1000_SYSTIML, ts->tv_nsec); - E1000_WRITE_REG(hw, E1000_SYSTIMH, ts->tv_sec); -} - -/** - * igb_ptp_systim_to_hwtstamp - convert system time value to hw timestamp - * @adapter: board private structure - * @hwtstamps: timestamp structure to update - * @systim: unsigned 64bit system time value. - * - * We need to convert the system time value stored in the RX/TXSTMP registers - * into a hwtstamp which can be used by the upper level timestamping functions. - * - * The 'tmreg_lock' spinlock is used to protect the consistency of the - * system time value. This is needed because reading the 64 bit time - * value involves reading two (or three) 32 bit registers. The first - * read latches the value. Ditto for writing. - * - * In addition, here have extended the system time with an overflow - * counter in software. - **/ -static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter, - struct skb_shared_hwtstamps *hwtstamps, - u64 systim) -{ - unsigned long flags; - u64 ns; - - switch (adapter->hw.mac.type) { - case e1000_82576: - case e1000_82580: - case e1000_i350: - case e1000_i354: - spin_lock_irqsave(&adapter->tmreg_lock, flags); - - ns = timecounter_cyc2time(&adapter->tc, systim); - - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - - memset(hwtstamps, 0, sizeof(*hwtstamps)); - hwtstamps->hwtstamp = ns_to_ktime(ns); - break; - case e1000_i210: - case e1000_i211: - memset(hwtstamps, 0, sizeof(*hwtstamps)); - /* Upper 32 bits contain s, lower 32 bits contain ns. */ - hwtstamps->hwtstamp = ktime_set(systim >> 32, - systim & 0xFFFFFFFF); - break; - default: - break; - } -} - -/* - * PTP clock operations - */ - -static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - struct e1000_hw *hw = &igb->hw; - int neg_adj = 0; - u64 rate; - u32 incvalue; - - if (ppb < 0) { - neg_adj = 1; - ppb = -ppb; - } - rate = ppb; - rate <<= 14; - rate = div_u64(rate, 1953125); - - incvalue = 16 << IGB_82576_TSYNC_SHIFT; - - if (neg_adj) - incvalue -= rate; - else - incvalue += rate; - - E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | (incvalue & INCVALUE_82576_MASK)); - - return 0; -} - -static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - struct e1000_hw *hw = &igb->hw; - int neg_adj = 0; - u64 rate; - u32 inca; - - if (ppb < 0) { - neg_adj = 1; - ppb = -ppb; - } - rate = ppb; - rate <<= 26; - rate = div_u64(rate, 1953125); - - /* At 2.5G speeds, the TIMINCA register on I354 updates the clock 2.5x - * as quickly. Account for this by dividing the adjustment by 2.5. - */ - if (hw->mac.type == e1000_i354) { - u32 status = E1000_READ_REG(hw, E1000_STATUS); - - if ((status & E1000_STATUS_2P5_SKU) && - !(status & E1000_STATUS_2P5_SKU_OVER)) { - rate <<= 1; - rate = div_u64(rate, 5); - } - } - - inca = rate & INCVALUE_MASK; - if (neg_adj) - inca |= ISGN; - - E1000_WRITE_REG(hw, E1000_TIMINCA, inca); - - return 0; -} - -static int igb_ptp_adjtime_82576(struct ptp_clock_info *ptp, s64 delta) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - s64 now; - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - now = timecounter_read(&igb->tc); - now += delta; - timecounter_init(&igb->tc, &igb->cc, now); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - return 0; -} - -static int igb_ptp_adjtime_i210(struct ptp_clock_info *ptp, s64 delta) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - struct timespec now, then = ns_to_timespec(delta); - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - igb_ptp_read_i210(igb, &now); - now = timespec_add(now, then); - igb_ptp_write_i210(igb, (const struct timespec *)&now); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - return 0; -} - -static int igb_ptp_gettime_82576(struct ptp_clock_info *ptp, - struct timespec *ts) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - u64 ns; - u32 remainder; - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - ns = timecounter_read(&igb->tc); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder); - ts->tv_nsec = remainder; - - return 0; -} - -static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp, - struct timespec *ts) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - igb_ptp_read_i210(igb, ts); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - return 0; -} - -static int igb_ptp_settime_82576(struct ptp_clock_info *ptp, - const struct timespec *ts) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - u64 ns; - - ns = ts->tv_sec * 1000000000ULL; - ns += ts->tv_nsec; - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - timecounter_init(&igb->tc, &igb->cc, ns); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - return 0; -} - -static int igb_ptp_settime_i210(struct ptp_clock_info *ptp, - const struct timespec *ts) -{ - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, - ptp_caps); - unsigned long flags; - - spin_lock_irqsave(&igb->tmreg_lock, flags); - - igb_ptp_write_i210(igb, ts); - - spin_unlock_irqrestore(&igb->tmreg_lock, flags); - - return 0; -} - -static int igb_ptp_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, int on) -{ - return -EOPNOTSUPP; -} - -/** - * igb_ptp_tx_work - * @work: pointer to work struct - * - * This work function polls the TSYNCTXCTL valid bit to determine when a - * timestamp has been taken for the current stored skb. - */ -void igb_ptp_tx_work(struct work_struct *work) -{ - struct igb_adapter *adapter = container_of(work, struct igb_adapter, - ptp_tx_work); - struct e1000_hw *hw = &adapter->hw; - u32 tsynctxctl; - - if (!adapter->ptp_tx_skb) - return; - - if (time_is_before_jiffies(adapter->ptp_tx_start + - IGB_PTP_TX_TIMEOUT)) { - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - adapter->tx_hwtstamp_timeouts++; - dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang"); - return; - } - - tsynctxctl = E1000_READ_REG(hw, E1000_TSYNCTXCTL); - if (tsynctxctl & E1000_TSYNCTXCTL_VALID) - igb_ptp_tx_hwtstamp(adapter); - else - /* reschedule to check later */ - schedule_work(&adapter->ptp_tx_work); -} - -static void igb_ptp_overflow_check(struct work_struct *work) -{ - struct igb_adapter *igb = - container_of(work, struct igb_adapter, ptp_overflow_work.work); - struct timespec ts; - - igb->ptp_caps.gettime(&igb->ptp_caps, &ts); - - pr_debug("igb overflow check at %ld.%09lu\n", ts.tv_sec, ts.tv_nsec); - - schedule_delayed_work(&igb->ptp_overflow_work, - IGB_SYSTIM_OVERFLOW_PERIOD); -} - -/** - * igb_ptp_rx_hang - detect error case when Rx timestamp registers latched - * @adapter: private network adapter structure - * - * This watchdog task is scheduled to detect error case where hardware has - * dropped an Rx packet that was timestamped when the ring is full. The - * particular error is rare but leaves the device in a state unable to timestamp - * any future packets. - */ -void igb_ptp_rx_hang(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct igb_ring *rx_ring; - u32 tsyncrxctl = E1000_READ_REG(hw, E1000_TSYNCRXCTL); - unsigned long rx_event; - int n; - - if (hw->mac.type != e1000_82576) - return; - - /* If we don't have a valid timestamp in the registers, just update the - * timeout counter and exit - */ - if (!(tsyncrxctl & E1000_TSYNCRXCTL_VALID)) { - adapter->last_rx_ptp_check = jiffies; - return; - } - - /* Determine the most recent watchdog or rx_timestamp event */ - rx_event = adapter->last_rx_ptp_check; - for (n = 0; n < adapter->num_rx_queues; n++) { - rx_ring = adapter->rx_ring[n]; - if (time_after(rx_ring->last_rx_timestamp, rx_event)) - rx_event = rx_ring->last_rx_timestamp; - } - - /* Only need to read the high RXSTMP register to clear the lock */ - if (time_is_before_jiffies(rx_event + 5 * HZ)) { - E1000_READ_REG(hw, E1000_RXSTMPH); - adapter->last_rx_ptp_check = jiffies; - adapter->rx_hwtstamp_cleared++; - dev_warn(&adapter->pdev->dev, "clearing Rx timestamp hang"); - } -} - -/** - * igb_ptp_tx_hwtstamp - utility function which checks for TX time stamp - * @adapter: Board private structure. - * - * If we were asked to do hardware stamping and such a time stamp is - * available, then it must have been for this skb here because we only - * allow only one such packet into the queue. - */ -void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct skb_shared_hwtstamps shhwtstamps; - u64 regval; - - regval = E1000_READ_REG(hw, E1000_TXSTMPL); - regval |= (u64)E1000_READ_REG(hw, E1000_TXSTMPH) << 32; - - igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); - skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; -} - -/** - * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp - * @q_vector: Pointer to interrupt specific structure - * @va: Pointer to address containing Rx buffer - * @skb: Buffer containing timestamp and packet - * - * This function is meant to retrieve a timestamp from the first buffer of an - * incoming frame. The value is stored in little endian format starting on - * byte 8. - */ -void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, - unsigned char *va, - struct sk_buff *skb) -{ - __le64 *regval = (__le64 *)va; - - /* - * The timestamp is recorded in little endian format. - * DWORD: 0 1 2 3 - * Field: Reserved Reserved SYSTIML SYSTIMH - */ - igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb), - le64_to_cpu(regval[1])); -} - -/** - * igb_ptp_rx_rgtstamp - retrieve Rx timestamp stored in register - * @q_vector: Pointer to interrupt specific structure - * @skb: Buffer containing timestamp and packet - * - * This function is meant to retrieve a timestamp from the internal registers - * of the adapter and store it in the skb. - */ -void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, - struct sk_buff *skb) -{ - struct igb_adapter *adapter = q_vector->adapter; - struct e1000_hw *hw = &adapter->hw; - u64 regval; - - /* - * If this bit is set, then the RX registers contain the time stamp. No - * other packet will be time stamped until we read these registers, so - * read the registers to make them available again. Because only one - * packet can be time stamped at a time, we know that the register - * values must belong to this one here and therefore we don't need to - * compare any of the additional attributes stored for it. - * - * If nothing went wrong, then it should have a shared tx_flags that we - * can turn into a skb_shared_hwtstamps. - */ - if (!(E1000_READ_REG(hw, E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) - return; - - regval = E1000_READ_REG(hw, E1000_RXSTMPL); - regval |= (u64)E1000_READ_REG(hw, E1000_RXSTMPH) << 32; - - igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); -} - -/** - * igb_ptp_hwtstamp_ioctl - control hardware time stamping - * @netdev: - * @ifreq: - * @cmd: - * - * Outgoing time stamping can be enabled and disabled. Play nice and - * disable it when requested, although it shouldn't case any overhead - * when no packet needs it. At most one packet in the queue may be - * marked for time stamping, otherwise it would be impossible to tell - * for sure to which packet the hardware time stamp belongs. - * - * Incoming time stamping has to be configured via the hardware - * filters. Not all combinations are supported, in particular event - * type has to be specified. Matching the kind of event packet is - * not supported, with the exception of "all V2 events regardless of - * level 2 or 4". - * - **/ -int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, - struct ifreq *ifr, int cmd) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - struct hwtstamp_config config; - u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; - u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; - u32 tsync_rx_cfg = 0; - bool is_l4 = false; - bool is_l2 = false; - u32 regval; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - /* reserved for future extensions */ - if (config.flags) - return -EINVAL; - - switch (config.tx_type) { - case HWTSTAMP_TX_OFF: - tsync_tx_ctl = 0; - case HWTSTAMP_TX_ON: - break; - default: - return -ERANGE; - } - - switch (config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - tsync_rx_ctl = 0; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2; - config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; - is_l2 = true; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_ALL: - /* - * 82576 cannot timestamp all packets, which it needs to do to - * support both V1 Sync and Delay_Req messages - */ - if (hw->mac.type != e1000_82576) { - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; - config.rx_filter = HWTSTAMP_FILTER_ALL; - break; - } - /* fall through */ - default: - config.rx_filter = HWTSTAMP_FILTER_NONE; - return -ERANGE; - } - - if (hw->mac.type == e1000_82575) { - if (tsync_rx_ctl | tsync_tx_ctl) - return -EINVAL; - return 0; - } - - /* - * Per-packet timestamping only works if all packets are - * timestamped, so enable timestamping in all packets as - * long as one rx filter was configured. - */ - if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) { - tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; - config.rx_filter = HWTSTAMP_FILTER_ALL; - is_l2 = true; - is_l4 = true; - - if ((hw->mac.type == e1000_i210) || - (hw->mac.type == e1000_i211)) { - regval = E1000_READ_REG(hw, E1000_RXPBS); - regval |= E1000_RXPBS_CFG_TS_EN; - E1000_WRITE_REG(hw, E1000_RXPBS, regval); - } - } - - /* enable/disable TX */ - regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL); - regval &= ~E1000_TSYNCTXCTL_ENABLED; - regval |= tsync_tx_ctl; - E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval); - - /* enable/disable RX */ - regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL); - regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK); - regval |= tsync_rx_ctl; - E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval); - - /* define which PTP packets are time stamped */ - E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg); - - /* define ethertype filter for timestamped packets */ - if (is_l2) - E1000_WRITE_REG(hw, E1000_ETQF(3), - (E1000_ETQF_FILTER_ENABLE | /* enable filter */ - E1000_ETQF_1588 | /* enable timestamping */ - ETH_P_1588)); /* 1588 eth protocol type */ - else - E1000_WRITE_REG(hw, E1000_ETQF(3), 0); - - /* L4 Queue Filter[3]: filter by destination port and protocol */ - if (is_l4) { - u32 ftqf = (IPPROTO_UDP /* UDP */ - | E1000_FTQF_VF_BP /* VF not compared */ - | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */ - | E1000_FTQF_MASK); /* mask all inputs */ - ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */ - - E1000_WRITE_REG(hw, E1000_IMIR(3), htons(PTP_EV_PORT)); - E1000_WRITE_REG(hw, E1000_IMIREXT(3), - (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); - if (hw->mac.type == e1000_82576) { - /* enable source port check */ - E1000_WRITE_REG(hw, E1000_SPQF(3), htons(PTP_EV_PORT)); - ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP; - } - E1000_WRITE_REG(hw, E1000_FTQF(3), ftqf); - } else { - E1000_WRITE_REG(hw, E1000_FTQF(3), E1000_FTQF_MASK); - } - E1000_WRITE_FLUSH(hw); - - /* clear TX/RX time stamp registers, just to be sure */ - regval = E1000_READ_REG(hw, E1000_TXSTMPL); - regval = E1000_READ_REG(hw, E1000_TXSTMPH); - regval = E1000_READ_REG(hw, E1000_RXSTMPL); - regval = E1000_READ_REG(hw, E1000_RXSTMPH); - - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} - -void igb_ptp_init(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct net_device *netdev = adapter->netdev; - - switch (hw->mac.type) { - case e1000_82576: - snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); - adapter->ptp_caps.owner = THIS_MODULE; - adapter->ptp_caps.max_adj = 999999881; - adapter->ptp_caps.n_ext_ts = 0; - adapter->ptp_caps.pps = 0; - adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576; - adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; - adapter->ptp_caps.gettime = igb_ptp_gettime_82576; - adapter->ptp_caps.settime = igb_ptp_settime_82576; - adapter->ptp_caps.enable = igb_ptp_enable; - adapter->cc.read = igb_ptp_read_82576; - adapter->cc.mask = CLOCKSOURCE_MASK(64); - adapter->cc.mult = 1; - adapter->cc.shift = IGB_82576_TSYNC_SHIFT; - /* Dial the nominal frequency. */ - E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | - INCVALUE_82576); - break; - case e1000_82580: - case e1000_i350: - case e1000_i354: - snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); - adapter->ptp_caps.owner = THIS_MODULE; - adapter->ptp_caps.max_adj = 62499999; - adapter->ptp_caps.n_ext_ts = 0; - adapter->ptp_caps.pps = 0; - adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; - adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; - adapter->ptp_caps.gettime = igb_ptp_gettime_82576; - adapter->ptp_caps.settime = igb_ptp_settime_82576; - adapter->ptp_caps.enable = igb_ptp_enable; - adapter->cc.read = igb_ptp_read_82580; - adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580); - adapter->cc.mult = 1; - adapter->cc.shift = 0; - /* Enable the timer functions by clearing bit 31. */ - E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0); - break; - case e1000_i210: - case e1000_i211: - snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); - adapter->ptp_caps.owner = THIS_MODULE; - adapter->ptp_caps.max_adj = 62499999; - adapter->ptp_caps.n_ext_ts = 0; - adapter->ptp_caps.pps = 0; - adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; - adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210; - adapter->ptp_caps.gettime = igb_ptp_gettime_i210; - adapter->ptp_caps.settime = igb_ptp_settime_i210; - adapter->ptp_caps.enable = igb_ptp_enable; - /* Enable the timer functions by clearing bit 31. */ - E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0); - break; - default: - adapter->ptp_clock = NULL; - return; - } - - E1000_WRITE_FLUSH(hw); - - spin_lock_init(&adapter->tmreg_lock); - INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); - - /* Initialize the clock and overflow work for devices that need it. */ - if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { - struct timespec ts = ktime_to_timespec(ktime_get_real()); - - igb_ptp_settime_i210(&adapter->ptp_caps, &ts); - } else { - timecounter_init(&adapter->tc, &adapter->cc, - ktime_to_ns(ktime_get_real())); - - INIT_DELAYED_WORK(&adapter->ptp_overflow_work, - igb_ptp_overflow_check); - - schedule_delayed_work(&adapter->ptp_overflow_work, - IGB_SYSTIM_OVERFLOW_PERIOD); - } - - /* Initialize the time sync interrupts for devices that support it. */ - if (hw->mac.type >= e1000_82580) { - E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS); - E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS); - } - - adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, - &adapter->pdev->dev); - if (IS_ERR(adapter->ptp_clock)) { - adapter->ptp_clock = NULL; - dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n"); - } else { - dev_info(&adapter->pdev->dev, "added PHC on %s\n", - adapter->netdev->name); - adapter->flags |= IGB_FLAG_PTP; - } -} - -/** - * igb_ptp_stop - Disable PTP device and stop the overflow check. - * @adapter: Board private structure. - * - * This function stops the PTP support and cancels the delayed work. - **/ -void igb_ptp_stop(struct igb_adapter *adapter) -{ - switch (adapter->hw.mac.type) { - case e1000_82576: - case e1000_82580: - case e1000_i350: - case e1000_i354: - cancel_delayed_work_sync(&adapter->ptp_overflow_work); - break; - case e1000_i210: - case e1000_i211: - /* No delayed work to cancel. */ - break; - default: - return; - } - - cancel_work_sync(&adapter->ptp_tx_work); - if (adapter->ptp_tx_skb) { - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - } - - if (adapter->ptp_clock) { - ptp_clock_unregister(adapter->ptp_clock); - dev_info(&adapter->pdev->dev, "removed PHC on %s\n", - adapter->netdev->name); - adapter->flags &= ~IGB_FLAG_PTP; - } -} - -/** - * igb_ptp_reset - Re-enable the adapter for PTP following a reset. - * @adapter: Board private structure. - * - * This function handles the reset work required to re-enable the PTP device. - **/ -void igb_ptp_reset(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - - if (!(adapter->flags & IGB_FLAG_PTP)) - return; - - switch (adapter->hw.mac.type) { - case e1000_82576: - /* Dial the nominal frequency. */ - E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | - INCVALUE_82576); - break; - case e1000_82580: - case e1000_i350: - case e1000_i354: - case e1000_i210: - case e1000_i211: - /* Enable the timer functions and interrupts. */ - E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0); - E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS); - E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS); - break; - default: - /* No work to do. */ - return; - } - - /* Re-initialize the timer. */ - if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { - struct timespec ts = ktime_to_timespec(ktime_get_real()); - - igb_ptp_settime_i210(&adapter->ptp_caps, &ts); - } else { - timecounter_init(&adapter->tc, &adapter->cc, - ktime_to_ns(ktime_get_real())); - } -} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h index 18da64a3..9d49b45e 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c index 015c8952..205da562 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h index e51e7c4e..c6d4c568 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c deleted file mode 100644 index bde3a83c..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c +++ /dev/null @@ -1,1482 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -#include "igb.h" -#include "kcompat.h" - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) ) -/* From lib/vsprintf.c */ -#include - -static int skip_atoi(const char **s) -{ - int i=0; - - while (isdigit(**s)) - i = i*10 + *((*s)++) - '0'; - return i; -} - -#define _kc_ZEROPAD 1 /* pad with zero */ -#define _kc_SIGN 2 /* unsigned/signed long */ -#define _kc_PLUS 4 /* show plus */ -#define _kc_SPACE 8 /* space if plus */ -#define _kc_LEFT 16 /* left justified */ -#define _kc_SPECIAL 32 /* 0x */ -#define _kc_LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ - -static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type) -{ - char c,sign,tmp[66]; - const char *digits; - const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; - const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - int i; - - digits = (type & _kc_LARGE) ? large_digits : small_digits; - if (type & _kc_LEFT) - type &= ~_kc_ZEROPAD; - if (base < 2 || base > 36) - return 0; - c = (type & _kc_ZEROPAD) ? '0' : ' '; - sign = 0; - if (type & _kc_SIGN) { - if (num < 0) { - sign = '-'; - num = -num; - size--; - } else if (type & _kc_PLUS) { - sign = '+'; - size--; - } else if (type & _kc_SPACE) { - sign = ' '; - size--; - } - } - if (type & _kc_SPECIAL) { - if (base == 16) - size -= 2; - else if (base == 8) - size--; - } - i = 0; - if (num == 0) - tmp[i++]='0'; - else while (num != 0) - tmp[i++] = digits[do_div(num,base)]; - if (i > precision) - precision = i; - size -= precision; - if (!(type&(_kc_ZEROPAD+_kc_LEFT))) { - while(size-->0) { - if (buf <= end) - *buf = ' '; - ++buf; - } - } - if (sign) { - if (buf <= end) - *buf = sign; - ++buf; - } - if (type & _kc_SPECIAL) { - if (base==8) { - if (buf <= end) - *buf = '0'; - ++buf; - } else if (base==16) { - if (buf <= end) - *buf = '0'; - ++buf; - if (buf <= end) - *buf = digits[33]; - ++buf; - } - } - if (!(type & _kc_LEFT)) { - while (size-- > 0) { - if (buf <= end) - *buf = c; - ++buf; - } - } - while (i < precision--) { - if (buf <= end) - *buf = '0'; - ++buf; - } - while (i-- > 0) { - if (buf <= end) - *buf = tmp[i]; - ++buf; - } - while (size-- > 0) { - if (buf <= end) - *buf = ' '; - ++buf; - } - return buf; -} - -int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) -{ - int len; - unsigned long long num; - int i, base; - char *str, *end, c; - const char *s; - - int flags; /* flags to number() */ - - int field_width; /* width of output field */ - int precision; /* min. # of digits for integers; max - number of chars for from string */ - int qualifier; /* 'h', 'l', or 'L' for integer fields */ - /* 'z' support added 23/7/1999 S.H. */ - /* 'z' changed to 'Z' --davidm 1/25/99 */ - - str = buf; - end = buf + size - 1; - - if (end < buf - 1) { - end = ((void *) -1); - size = end - buf + 1; - } - - for (; *fmt ; ++fmt) { - if (*fmt != '%') { - if (str <= end) - *str = *fmt; - ++str; - continue; - } - - /* process flags */ - flags = 0; - repeat: - ++fmt; /* this also skips first '%' */ - switch (*fmt) { - case '-': flags |= _kc_LEFT; goto repeat; - case '+': flags |= _kc_PLUS; goto repeat; - case ' ': flags |= _kc_SPACE; goto repeat; - case '#': flags |= _kc_SPECIAL; goto repeat; - case '0': flags |= _kc_ZEROPAD; goto repeat; - } - - /* get field width */ - field_width = -1; - if (isdigit(*fmt)) - field_width = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - field_width = va_arg(args, int); - if (field_width < 0) { - field_width = -field_width; - flags |= _kc_LEFT; - } - } - - /* get the precision */ - precision = -1; - if (*fmt == '.') { - ++fmt; - if (isdigit(*fmt)) - precision = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - precision = va_arg(args, int); - } - if (precision < 0) - precision = 0; - } - - /* get the conversion qualifier */ - qualifier = -1; - if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') { - qualifier = *fmt; - ++fmt; - } - - /* default base */ - base = 10; - - switch (*fmt) { - case 'c': - if (!(flags & _kc_LEFT)) { - while (--field_width > 0) { - if (str <= end) - *str = ' '; - ++str; - } - } - c = (unsigned char) va_arg(args, int); - if (str <= end) - *str = c; - ++str; - while (--field_width > 0) { - if (str <= end) - *str = ' '; - ++str; - } - continue; - - case 's': - s = va_arg(args, char *); - if (!s) - s = ""; - - len = strnlen(s, precision); - - if (!(flags & _kc_LEFT)) { - while (len < field_width--) { - if (str <= end) - *str = ' '; - ++str; - } - } - for (i = 0; i < len; ++i) { - if (str <= end) - *str = *s; - ++str; ++s; - } - while (len < field_width--) { - if (str <= end) - *str = ' '; - ++str; - } - continue; - - case 'p': - if (field_width == -1) { - field_width = 2*sizeof(void *); - flags |= _kc_ZEROPAD; - } - str = number(str, end, - (unsigned long) va_arg(args, void *), - 16, field_width, precision, flags); - continue; - - - case 'n': - /* FIXME: - * What does C99 say about the overflow case here? */ - if (qualifier == 'l') { - long * ip = va_arg(args, long *); - *ip = (str - buf); - } else if (qualifier == 'Z') { - size_t * ip = va_arg(args, size_t *); - *ip = (str - buf); - } else { - int * ip = va_arg(args, int *); - *ip = (str - buf); - } - continue; - - case '%': - if (str <= end) - *str = '%'; - ++str; - continue; - - /* integer number formats - set up the flags and "break" */ - case 'o': - base = 8; - break; - - case 'X': - flags |= _kc_LARGE; - case 'x': - base = 16; - break; - - case 'd': - case 'i': - flags |= _kc_SIGN; - case 'u': - break; - - default: - if (str <= end) - *str = '%'; - ++str; - if (*fmt) { - if (str <= end) - *str = *fmt; - ++str; - } else { - --fmt; - } - continue; - } - if (qualifier == 'L') - num = va_arg(args, long long); - else if (qualifier == 'l') { - num = va_arg(args, unsigned long); - if (flags & _kc_SIGN) - num = (signed long) num; - } else if (qualifier == 'Z') { - num = va_arg(args, size_t); - } else if (qualifier == 'h') { - num = (unsigned short) va_arg(args, int); - if (flags & _kc_SIGN) - num = (signed short) num; - } else { - num = va_arg(args, unsigned int); - if (flags & _kc_SIGN) - num = (signed int) num; - } - str = number(str, end, num, base, - field_width, precision, flags); - } - if (str <= end) - *str = '\0'; - else if (size > 0) - /* don't write out a null byte if the buf size is zero */ - *end = '\0'; - /* the trailing null byte doesn't count towards the total - * ++str; - */ - return str-buf; -} - -int _kc_snprintf(char * buf, size_t size, const char *fmt, ...) -{ - va_list args; - int i; - - va_start(args, fmt); - i = _kc_vsnprintf(buf,size,fmt,args); - va_end(args); - return i; -} -#endif /* < 2.4.8 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) ) - -/**************************************/ -/* PCI DMA MAPPING */ - -#if defined(CONFIG_HIGHMEM) - -#ifndef PCI_DRAM_OFFSET -#define PCI_DRAM_OFFSET 0 -#endif - -u64 -_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, - size_t size, int direction) -{ - return (((u64) (page - mem_map) << PAGE_SHIFT) + offset + - PCI_DRAM_OFFSET); -} - -#else /* CONFIG_HIGHMEM */ - -u64 -_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, - size_t size, int direction) -{ - return pci_map_single(dev, (void *)page_address(page) + offset, size, - direction); -} - -#endif /* CONFIG_HIGHMEM */ - -void -_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, - int direction) -{ - return pci_unmap_single(dev, dma_addr, size, direction); -} - -#endif /* 2.4.13 => 2.4.3 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) ) - -/**************************************/ -/* PCI DRIVER API */ - -int -_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask) -{ - if (!pci_dma_supported(dev, mask)) - return -EIO; - dev->dma_mask = mask; - return 0; -} - -int -_kc_pci_request_regions(struct pci_dev *dev, char *res_name) -{ - int i; - - for (i = 0; i < 6; i++) { - if (pci_resource_len(dev, i) == 0) - continue; - - if (pci_resource_flags(dev, i) & IORESOURCE_IO) { - if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) { - pci_release_regions(dev); - return -EBUSY; - } - } else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) { - if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) { - pci_release_regions(dev); - return -EBUSY; - } - } - } - return 0; -} - -void -_kc_pci_release_regions(struct pci_dev *dev) -{ - int i; - - for (i = 0; i < 6; i++) { - if (pci_resource_len(dev, i) == 0) - continue; - - if (pci_resource_flags(dev, i) & IORESOURCE_IO) - release_region(pci_resource_start(dev, i), pci_resource_len(dev, i)); - - else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) - release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i)); - } -} - -/**************************************/ -/* NETWORK DRIVER API */ - -struct net_device * -_kc_alloc_etherdev(int sizeof_priv) -{ - struct net_device *dev; - int alloc_size; - - alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31; - dev = kzalloc(alloc_size, GFP_KERNEL); - if (!dev) - return NULL; - - if (sizeof_priv) - dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31); - dev->name[0] = '\0'; - ether_setup(dev); - - return dev; -} - -int -_kc_is_valid_ether_addr(u8 *addr) -{ - const char zaddr[6] = { 0, }; - - return !(addr[0] & 1) && memcmp(addr, zaddr, 6); -} - -#endif /* 2.4.3 => 2.4.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) ) - -int -_kc_pci_set_power_state(struct pci_dev *dev, int state) -{ - return 0; -} - -int -_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable) -{ - return 0; -} - -#endif /* 2.4.6 => 2.4.3 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) -void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, - int off, int size) -{ - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - frag->page = page; - frag->page_offset = off; - frag->size = size; - skb_shinfo(skb)->nr_frags = i + 1; -} - -/* - * Original Copyright: - * find_next_bit.c: fallback find next bit implementation - * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - */ - -/** - * find_next_bit - find the next set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + ffs(tmp); -} - -size_t _kc_strlcpy(char *dest, const char *src, size_t size) -{ - size_t ret = strlen(src); - - if (size) { - size_t len = (ret >= size) ? size - 1 : ret; - memcpy(dest, src, len); - dest[len] = '\0'; - } - return ret; -} - -#ifndef do_div -#if BITS_PER_LONG == 32 -uint32_t __attribute__((weak)) _kc__div64_32(uint64_t *n, uint32_t base) -{ - uint64_t rem = *n; - uint64_t b = base; - uint64_t res, d = 1; - uint32_t high = rem >> 32; - - /* Reduce the thing a bit first */ - res = 0; - if (high >= base) { - high /= base; - res = (uint64_t) high << 32; - rem -= (uint64_t) (high*base) << 32; - } - - while ((int64_t)b > 0 && b < rem) { - b = b+b; - d = d+d; - } - - do { - if (rem >= b) { - rem -= b; - res += d; - } - b >>= 1; - d >>= 1; - } while (d); - - *n = res; - return rem; -} -#endif /* BITS_PER_LONG == 32 */ -#endif /* do_div */ -#endif /* 2.6.0 => 2.4.6 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) -int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...) -{ - va_list args; - int i; - - va_start(args, fmt); - i = vsnprintf(buf, size, fmt, args); - va_end(args); - return (i >= size) ? (size - 1) : i; -} -#endif /* < 2.6.4 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) ) -DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES) = {1}; -#endif /* < 2.6.10 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) ) -char *_kc_kstrdup(const char *s, unsigned int gfp) -{ - size_t len; - char *buf; - - if (!s) - return NULL; - - len = strlen(s) + 1; - buf = kmalloc(len, gfp); - if (buf) - memcpy(buf, s, len); - return buf; -} -#endif /* < 2.6.13 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) ) -void *_kc_kzalloc(size_t size, int flags) -{ - void *ret = kmalloc(size, flags); - if (ret) - memset(ret, 0, size); - return ret; -} -#endif /* <= 2.6.13 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) ) -int _kc_skb_pad(struct sk_buff *skb, int pad) -{ - int ntail; - - /* If the skbuff is non linear tailroom is always zero.. */ - if(!skb_cloned(skb) && skb_tailroom(skb) >= pad) { - memset(skb->data+skb->len, 0, pad); - return 0; - } - - ntail = skb->data_len + pad - (skb->end - skb->tail); - if (likely(skb_cloned(skb) || ntail > 0)) { - if (pskb_expand_head(skb, 0, ntail, GFP_ATOMIC)); - goto free_skb; - } - -#ifdef MAX_SKB_FRAGS - if (skb_is_nonlinear(skb) && - !__pskb_pull_tail(skb, skb->data_len)) - goto free_skb; - -#endif - memset(skb->data + skb->len, 0, pad); - return 0; - -free_skb: - kfree_skb(skb); - return -ENOMEM; -} - -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4))) -int _kc_pci_save_state(struct pci_dev *pdev) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct adapter_struct *adapter = netdev_priv(netdev); - int size = PCI_CONFIG_SPACE_LEN, i; - u16 pcie_cap_offset, pcie_link_status; - -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) - /* no ->dev for 2.4 kernels */ - WARN_ON(pdev->dev.driver_data == NULL); -#endif - pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP); - if (pcie_cap_offset) { - if (!pci_read_config_word(pdev, - pcie_cap_offset + PCIE_LINK_STATUS, - &pcie_link_status)) - size = PCIE_CONFIG_SPACE_LEN; - } - pci_config_space_ich8lan(); -#ifdef HAVE_PCI_ERS - if (adapter->config_space == NULL) -#else - WARN_ON(adapter->config_space != NULL); -#endif - adapter->config_space = kmalloc(size, GFP_KERNEL); - if (!adapter->config_space) { - printk(KERN_ERR "Out of memory in pci_save_state\n"); - return -ENOMEM; - } - for (i = 0; i < (size / 4); i++) - pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]); - return 0; -} - -void _kc_pci_restore_state(struct pci_dev *pdev) -{ - struct net_device *netdev = pci_get_drvdata(pdev); - struct adapter_struct *adapter = netdev_priv(netdev); - int size = PCI_CONFIG_SPACE_LEN, i; - u16 pcie_cap_offset; - u16 pcie_link_status; - - if (adapter->config_space != NULL) { - pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP); - if (pcie_cap_offset && - !pci_read_config_word(pdev, - pcie_cap_offset + PCIE_LINK_STATUS, - &pcie_link_status)) - size = PCIE_CONFIG_SPACE_LEN; - - pci_config_space_ich8lan(); - for (i = 0; i < (size / 4); i++) - pci_write_config_dword(pdev, i * 4, adapter->config_space[i]); -#ifndef HAVE_PCI_ERS - kfree(adapter->config_space); - adapter->config_space = NULL; -#endif - } -} -#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */ - -#ifdef HAVE_PCI_ERS -void _kc_free_netdev(struct net_device *netdev) -{ - struct adapter_struct *adapter = netdev_priv(netdev); - - if (adapter->config_space != NULL) - kfree(adapter->config_space); -#ifdef CONFIG_SYSFS - if (netdev->reg_state == NETREG_UNINITIALIZED) { - kfree((char *)netdev - netdev->padded); - } else { - BUG_ON(netdev->reg_state != NETREG_UNREGISTERED); - netdev->reg_state = NETREG_RELEASED; - class_device_put(&netdev->class_dev); - } -#else - kfree((char *)netdev - netdev->padded); -#endif -} -#endif - -void *_kc_kmemdup(const void *src, size_t len, unsigned gfp) -{ - void *p; - - p = kzalloc(len, gfp); - if (p) - memcpy(p, src, len); - return p; -} -#endif /* <= 2.6.19 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) -struct pci_dev *_kc_netdev_to_pdev(struct net_device *netdev) -{ - return ((struct adapter_struct *)netdev_priv(netdev))->pdev; -} -#endif /* < 2.6.21 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) -/* hexdump code taken from lib/hexdump.c */ -static void _kc_hex_dump_to_buffer(const void *buf, size_t len, int rowsize, - int groupsize, unsigned char *linebuf, - size_t linebuflen, bool ascii) -{ - const u8 *ptr = buf; - u8 ch; - int j, lx = 0; - int ascii_column; - - if (rowsize != 16 && rowsize != 32) - rowsize = 16; - - if (!len) - goto nil; - if (len > rowsize) /* limit to one line at a time */ - len = rowsize; - if ((len % groupsize) != 0) /* no mixed size output */ - groupsize = 1; - - switch (groupsize) { - case 8: { - const u64 *ptr8 = buf; - int ngroups = len / groupsize; - - for (j = 0; j < ngroups; j++) - lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, - "%s%16.16llx", j ? " " : "", - (unsigned long long)*(ptr8 + j)); - ascii_column = 17 * ngroups + 2; - break; - } - - case 4: { - const u32 *ptr4 = buf; - int ngroups = len / groupsize; - - for (j = 0; j < ngroups; j++) - lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, - "%s%8.8x", j ? " " : "", *(ptr4 + j)); - ascii_column = 9 * ngroups + 2; - break; - } - - case 2: { - const u16 *ptr2 = buf; - int ngroups = len / groupsize; - - for (j = 0; j < ngroups; j++) - lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, - "%s%4.4x", j ? " " : "", *(ptr2 + j)); - ascii_column = 5 * ngroups + 2; - break; - } - - default: - for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) { - ch = ptr[j]; - linebuf[lx++] = hex_asc(ch >> 4); - linebuf[lx++] = hex_asc(ch & 0x0f); - linebuf[lx++] = ' '; - } - if (j) - lx--; - - ascii_column = 3 * rowsize + 2; - break; - } - if (!ascii) - goto nil; - - while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) - linebuf[lx++] = ' '; - for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) - linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j] - : '.'; -nil: - linebuf[lx++] = '\0'; -} - -void _kc_print_hex_dump(const char *level, - const char *prefix_str, int prefix_type, - int rowsize, int groupsize, - const void *buf, size_t len, bool ascii) -{ - const u8 *ptr = buf; - int i, linelen, remaining = len; - unsigned char linebuf[200]; - - if (rowsize != 16 && rowsize != 32) - rowsize = 16; - - for (i = 0; i < len; i += rowsize) { - linelen = min(remaining, rowsize); - remaining -= rowsize; - _kc_hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, - linebuf, sizeof(linebuf), ascii); - - switch (prefix_type) { - case DUMP_PREFIX_ADDRESS: - printk("%s%s%*p: %s\n", level, prefix_str, - (int)(2 * sizeof(void *)), ptr + i, linebuf); - break; - case DUMP_PREFIX_OFFSET: - printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf); - break; - default: - printk("%s%s%s\n", level, prefix_str, linebuf); - break; - } - } -} - -#ifdef HAVE_I2C_SUPPORT -struct i2c_client * -_kc_i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info) -{ - struct i2c_client *client; - int status; - - client = kzalloc(sizeof *client, GFP_KERNEL); - if (!client) - return NULL; - - client->adapter = adap; - - client->dev.platform_data = info->platform_data; - - client->flags = info->flags; - client->addr = info->addr; - - strlcpy(client->name, info->type, sizeof(client->name)); - - /* Check for address business */ - status = i2c_check_addr(adap, client->addr); - if (status) - goto out_err; - - client->dev.parent = &client->adapter->dev; - client->dev.bus = &i2c_bus_type; - - status = i2c_attach_client(client); - if (status) - goto out_err; - - dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n", - client->name, dev_name(&client->dev)); - - return client; - -out_err: - dev_err(&adap->dev, "Failed to register i2c client %s at 0x%02x " - "(%d)\n", client->name, client->addr, status); - kfree(client); - return NULL; -} -#endif /* HAVE_I2C_SUPPORT */ -#endif /* < 2.6.22 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) -#ifdef NAPI -struct net_device *napi_to_poll_dev(const struct napi_struct *napi) -{ - struct adapter_q_vector *q_vector = container_of(napi, - struct adapter_q_vector, - napi); - return &q_vector->poll_dev; -} - -int __kc_adapter_clean(struct net_device *netdev, int *budget) -{ - int work_done; - int work_to_do = min(*budget, netdev->quota); - /* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */ - struct napi_struct *napi = netdev->priv; - work_done = napi->poll(napi, work_to_do); - *budget -= work_done; - netdev->quota -= work_done; - return (work_done >= work_to_do) ? 1 : 0; -} -#endif /* NAPI */ -#endif /* <= 2.6.24 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) ) -void _kc_pci_disable_link_state(struct pci_dev *pdev, int state) -{ - struct pci_dev *parent = pdev->bus->self; - u16 link_state; - int pos; - - if (!parent) - return; - - pos = pci_find_capability(parent, PCI_CAP_ID_EXP); - if (pos) { - pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state); - link_state &= ~state; - pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state); - } -} -#endif /* < 2.6.26 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) ) -#ifdef HAVE_TX_MQ -void _kc_netif_tx_stop_all_queues(struct net_device *netdev) -{ - struct adapter_struct *adapter = netdev_priv(netdev); - int i; - - netif_stop_queue(netdev); - if (netif_is_multiqueue(netdev)) - for (i = 0; i < adapter->num_tx_queues; i++) - netif_stop_subqueue(netdev, i); -} -void _kc_netif_tx_wake_all_queues(struct net_device *netdev) -{ - struct adapter_struct *adapter = netdev_priv(netdev); - int i; - - netif_wake_queue(netdev); - if (netif_is_multiqueue(netdev)) - for (i = 0; i < adapter->num_tx_queues; i++) - netif_wake_subqueue(netdev, i); -} -void _kc_netif_tx_start_all_queues(struct net_device *netdev) -{ - struct adapter_struct *adapter = netdev_priv(netdev); - int i; - - netif_start_queue(netdev); - if (netif_is_multiqueue(netdev)) - for (i = 0; i < adapter->num_tx_queues; i++) - netif_start_subqueue(netdev, i); -} -#endif /* HAVE_TX_MQ */ - -#ifndef __WARN_printf -void __kc_warn_slowpath(const char *file, int line, const char *fmt, ...) -{ - va_list args; - - printk(KERN_WARNING "------------[ cut here ]------------\n"); - printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line); - va_start(args, fmt); - vprintk(fmt, args); - va_end(args); - - dump_stack(); -} -#endif /* __WARN_printf */ -#endif /* < 2.6.27 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) ) - -int -_kc_pci_prepare_to_sleep(struct pci_dev *dev) -{ - pci_power_t target_state; - int error; - - target_state = pci_choose_state(dev, PMSG_SUSPEND); - - pci_enable_wake(dev, target_state, true); - - error = pci_set_power_state(dev, target_state); - - if (error) - pci_enable_wake(dev, target_state, false); - - return error; -} - -int -_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable) -{ - int err; - - err = pci_enable_wake(dev, PCI_D3cold, enable); - if (err) - goto out; - - err = pci_enable_wake(dev, PCI_D3hot, enable); - -out: - return err; -} -#endif /* < 2.6.28 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) ) -static void __kc_pci_set_master(struct pci_dev *pdev, bool enable) -{ - u16 old_cmd, cmd; - - pci_read_config_word(pdev, PCI_COMMAND, &old_cmd); - if (enable) - cmd = old_cmd | PCI_COMMAND_MASTER; - else - cmd = old_cmd & ~PCI_COMMAND_MASTER; - if (cmd != old_cmd) { - dev_dbg(pci_dev_to_dev(pdev), "%s bus mastering\n", - enable ? "enabling" : "disabling"); - pci_write_config_word(pdev, PCI_COMMAND, cmd); - } -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,7) ) - pdev->is_busmaster = enable; -#endif -} - -void _kc_pci_clear_master(struct pci_dev *dev) -{ - __kc_pci_set_master(dev, false); -} -#endif /* < 2.6.29 */ - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) ) -#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0)) -int _kc_pci_num_vf(struct pci_dev *dev) -{ - int num_vf = 0; -#ifdef CONFIG_PCI_IOV - struct pci_dev *vfdev; - - /* loop through all ethernet devices starting at PF dev */ - vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, NULL); - while (vfdev) { - if (vfdev->is_virtfn && vfdev->physfn == dev) - num_vf++; - - vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, vfdev); - } - -#endif - return num_vf; -} -#endif /* RHEL_RELEASE_CODE */ -#endif /* < 2.6.34 */ - -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) ) -#ifdef HAVE_TX_MQ -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0))) -#ifndef CONFIG_NETDEVICES_MULTIQUEUE -void _kc_netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) -{ - unsigned int real_num = dev->real_num_tx_queues; - struct Qdisc *qdisc; - int i; - - if (unlikely(txq > dev->num_tx_queues)) - ; - else if (txq > real_num) - dev->real_num_tx_queues = txq; - else if ( txq < real_num) { - dev->real_num_tx_queues = txq; - for (i = txq; i < dev->num_tx_queues; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc; - if (qdisc) { - spin_lock_bh(qdisc_lock(qdisc)); - qdisc_reset(qdisc); - spin_unlock_bh(qdisc_lock(qdisc)); - } - } - } -} -#endif /* CONFIG_NETDEVICES_MULTIQUEUE */ -#endif /* !(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) */ -#endif /* HAVE_TX_MQ */ - -ssize_t _kc_simple_write_to_buffer(void *to, size_t available, loff_t *ppos, - const void __user *from, size_t count) -{ - loff_t pos = *ppos; - size_t res; - - if (pos < 0) - return -EINVAL; - if (pos >= available || !count) - return 0; - if (count > available - pos) - count = available - pos; - res = copy_from_user(to + pos, from, count); - if (res == count) - return -EFAULT; - count -= res; - *ppos = pos + count; - return count; -} - -#endif /* < 2.6.35 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) ) -static const u32 _kc_flags_dup_features = - (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH); - -u32 _kc_ethtool_op_get_flags(struct net_device *dev) -{ - return dev->features & _kc_flags_dup_features; -} - -int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) -{ - if (data & ~supported) - return -EINVAL; - - dev->features = ((dev->features & ~_kc_flags_dup_features) | - (data & _kc_flags_dup_features)); - return 0; -} -#endif /* < 2.6.36 */ - -/******************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) ) -#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0))) - - - -#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */ -#endif /* < 2.6.39 */ - -/******************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) ) -void _kc_skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, - int off, int size, unsigned int truesize) -{ - skb_fill_page_desc(skb, i, page, off, size); - skb->len += size; - skb->data_len += size; - skb->truesize += truesize; -} - -int _kc_simple_open(struct inode *inode, struct file *file) -{ - if (inode->i_private) - file->private_data = inode->i_private; - - return 0; -} - -#endif /* < 3.4.0 */ - -/******************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) ) -#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \ - !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) -static inline int __kc_pcie_cap_version(struct pci_dev *dev) -{ - int pos; - u16 reg16; - - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (!pos) - return 0; - pci_read_config_word(dev, pos + PCI_EXP_FLAGS, ®16); - return reg16 & PCI_EXP_FLAGS_VERS; -} - -static inline bool __kc_pcie_cap_has_devctl(const struct pci_dev __always_unused *dev) -{ - return true; -} - -static inline bool __kc_pcie_cap_has_lnkctl(struct pci_dev *dev) -{ - int type = pci_pcie_type(dev); - - return __kc_pcie_cap_version(dev) > 1 || - type == PCI_EXP_TYPE_ROOT_PORT || - type == PCI_EXP_TYPE_ENDPOINT || - type == PCI_EXP_TYPE_LEG_END; -} - -static inline bool __kc_pcie_cap_has_sltctl(struct pci_dev *dev) -{ - int type = pci_pcie_type(dev); - int pos; - u16 pcie_flags_reg; - - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (!pos) - return 0; - pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &pcie_flags_reg); - - return __kc_pcie_cap_version(dev) > 1 || - type == PCI_EXP_TYPE_ROOT_PORT || - (type == PCI_EXP_TYPE_DOWNSTREAM && - pcie_flags_reg & PCI_EXP_FLAGS_SLOT); -} - -static inline bool __kc_pcie_cap_has_rtctl(struct pci_dev *dev) -{ - int type = pci_pcie_type(dev); - - return __kc_pcie_cap_version(dev) > 1 || - type == PCI_EXP_TYPE_ROOT_PORT || - type == PCI_EXP_TYPE_RC_EC; -} - -static bool __kc_pcie_capability_reg_implemented(struct pci_dev *dev, int pos) -{ - if (!pci_is_pcie(dev)) - return false; - - switch (pos) { - case PCI_EXP_FLAGS_TYPE: - return true; - case PCI_EXP_DEVCAP: - case PCI_EXP_DEVCTL: - case PCI_EXP_DEVSTA: - return __kc_pcie_cap_has_devctl(dev); - case PCI_EXP_LNKCAP: - case PCI_EXP_LNKCTL: - case PCI_EXP_LNKSTA: - return __kc_pcie_cap_has_lnkctl(dev); - case PCI_EXP_SLTCAP: - case PCI_EXP_SLTCTL: - case PCI_EXP_SLTSTA: - return __kc_pcie_cap_has_sltctl(dev); - case PCI_EXP_RTCTL: - case PCI_EXP_RTCAP: - case PCI_EXP_RTSTA: - return __kc_pcie_cap_has_rtctl(dev); - case PCI_EXP_DEVCAP2: - case PCI_EXP_DEVCTL2: - case PCI_EXP_LNKCAP2: - case PCI_EXP_LNKCTL2: - case PCI_EXP_LNKSTA2: - return __kc_pcie_cap_version(dev) > 1; - default: - return false; - } -} - -/* - * Note that these accessor functions are only for the "PCI Express - * Capability" (see PCIe spec r3.0, sec 7.8). They do not apply to the - * other "PCI Express Extended Capabilities" (AER, VC, ACS, MFVC, etc.) - */ -int __kc_pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val) -{ - int ret; - - *val = 0; - if (pos & 1) - return -EINVAL; - - if (__kc_pcie_capability_reg_implemented(dev, pos)) { - ret = pci_read_config_word(dev, pci_pcie_cap(dev) + pos, val); - /* - * Reset *val to 0 if pci_read_config_word() fails, it may - * have been written as 0xFFFF if hardware error happens - * during pci_read_config_word(). - */ - if (ret) - *val = 0; - return ret; - } - - /* - * For Functions that do not implement the Slot Capabilities, - * Slot Status, and Slot Control registers, these spaces must - * be hardwired to 0b, with the exception of the Presence Detect - * State bit in the Slot Status register of Downstream Ports, - * which must be hardwired to 1b. (PCIe Base Spec 3.0, sec 7.8) - */ - if (pci_is_pcie(dev) && pos == PCI_EXP_SLTSTA && - pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) { - *val = PCI_EXP_SLTSTA_PDS; - } - - return 0; -} - -int __kc_pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val) -{ - if (pos & 1) - return -EINVAL; - - if (!__kc_pcie_capability_reg_implemented(dev, pos)) - return 0; - - return pci_write_config_word(dev, pci_pcie_cap(dev) + pos, val); -} - -int __kc_pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos, - u16 clear, u16 set) -{ - int ret; - u16 val; - - ret = __kc_pcie_capability_read_word(dev, pos, &val); - if (!ret) { - val &= ~clear; - val |= set; - ret = __kc_pcie_capability_write_word(dev, pos, val); - } - - return ret; -} -#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \ - !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) */ -#endif /* < 3.7.0 */ - -/******************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) ) -#endif /* 3.9.0 */ - -/*****************************************************************************/ -#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) -#ifdef CONFIG_PCI_IOV -int __kc_pci_vfs_assigned(struct pci_dev *dev) -{ - unsigned int vfs_assigned = 0; -#ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED - int pos; - struct pci_dev *vfdev; - unsigned short dev_id; - - /* only search if we are a PF */ - if (!dev->is_physfn) - return 0; - - /* find SR-IOV capability */ - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); - if (!pos) - return 0; - - /* - * determine the device ID for the VFs, the vendor ID will be the - * same as the PF so there is no need to check for that one - */ - pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &dev_id); - - /* loop through all the VFs to see if we own any that are assigned */ - vfdev = pci_get_device(dev->vendor, dev_id, NULL); - while (vfdev) { - /* - * It is considered assigned if it is a virtual function with - * our dev as the physical function and the assigned bit is set - */ - if (vfdev->is_virtfn && (vfdev->physfn == dev) && - (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED)) - vfs_assigned++; - - vfdev = pci_get_device(dev->vendor, dev_id, vfdev); - } - -#endif /* HAVE_PCI_DEV_FLAGS_ASSIGNED */ - return vfs_assigned; -} - -#endif /* CONFIG_PCI_IOV */ -#endif /* 3.10.0 */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h index de3b8dc9..84826b26 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List @@ -3891,7 +3891,7 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) #if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) ) \ || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) #define HAVE_NDO_DFLT_BRIDGE_ADD_MASK -#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) +#if ( RHEL_RELEASE_CODE != RHEL_RELEASE_VERSION(7,2) ) #define HAVE_NDO_FDB_ADD_VID #endif /* !RHEL 7.2 */ #endif /* >= 3.19.0 */ @@ -3901,12 +3901,13 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) /* vlan_tx_xx functions got renamed to skb_vlan */ #define vlan_tx_tag_get skb_vlan_tag_get #define vlan_tx_tag_present skb_vlan_tag_present -#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) +#if ( RHEL_RELEASE_CODE != RHEL_RELEASE_VERSION(7,2) ) #define HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS #endif /* !RHEL 7.2 */ #endif /* 4.0.0 */ -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) ) +#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) ) \ + || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,3) )) /* ndo_bridge_getlink adds new nlflags parameter */ #define HAVE_NDO_BRIDGE_GETLINK_NLFLAGS #endif /* >= 4.1.0 */ @@ -3916,6 +3917,18 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) #define HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL #endif /* >= 4.2.0 */ +/* + * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4) + * For older kernels backported this commit, need to use renamed functions. + * This fix is specific to RedHat/CentOS kernels. + */ +#if (defined(RHEL_RELEASE_CODE) && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34))) +#define vlan_tx_tag_get skb_vlan_tag_get +#define vlan_tx_tag_present skb_vlan_tag_present +#endif + #if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0) ) #define HAVE_VF_VLAN_PROTO #endif /* >= 4.9.0 */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c deleted file mode 100644 index e1a89388..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c +++ /dev/null @@ -1,1171 +0,0 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2013 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -/* - * net/core/ethtool.c - Ethtool ioctl handler - * Copyright (c) 2003 Matthew Wilcox - * - * This file is where we call all the ethtool_ops commands to get - * the information ethtool needs. We fall back to calling do_ioctl() - * for drivers which haven't been converted to ethtool_ops yet. - * - * It's GPL, stupid. - * - * Modification by sfeldma@pobox.com to work as backward compat - * solution for pre-ethtool_ops kernels. - * - copied struct ethtool_ops from ethtool.h - * - defined SET_ETHTOOL_OPS - * - put in some #ifndef NETIF_F_xxx wrappers - * - changes refs to dev->ethtool_ops to ethtool_ops - * - changed dev_ethtool to ethtool_ioctl - * - remove EXPORT_SYMBOL()s - * - added _kc_ prefix in built-in ethtool_op_xxx ops. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "kcompat.h" - -#undef SUPPORTED_10000baseT_Full -#define SUPPORTED_10000baseT_Full (1 << 12) -#undef ADVERTISED_10000baseT_Full -#define ADVERTISED_10000baseT_Full (1 << 12) -#undef SPEED_10000 -#define SPEED_10000 10000 - -#undef ethtool_ops -#define ethtool_ops _kc_ethtool_ops - -struct _kc_ethtool_ops { - int (*get_settings)(struct net_device *, struct ethtool_cmd *); - int (*set_settings)(struct net_device *, struct ethtool_cmd *); - void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); - int (*get_regs_len)(struct net_device *); - void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); - void (*get_wol)(struct net_device *, struct ethtool_wolinfo *); - int (*set_wol)(struct net_device *, struct ethtool_wolinfo *); - u32 (*get_msglevel)(struct net_device *); - void (*set_msglevel)(struct net_device *, u32); - int (*nway_reset)(struct net_device *); - u32 (*get_link)(struct net_device *); - int (*get_eeprom_len)(struct net_device *); - int (*get_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); - int (*set_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); - int (*get_coalesce)(struct net_device *, struct ethtool_coalesce *); - int (*set_coalesce)(struct net_device *, struct ethtool_coalesce *); - void (*get_ringparam)(struct net_device *, struct ethtool_ringparam *); - int (*set_ringparam)(struct net_device *, struct ethtool_ringparam *); - void (*get_pauseparam)(struct net_device *, - struct ethtool_pauseparam*); - int (*set_pauseparam)(struct net_device *, - struct ethtool_pauseparam*); - u32 (*get_rx_csum)(struct net_device *); - int (*set_rx_csum)(struct net_device *, u32); - u32 (*get_tx_csum)(struct net_device *); - int (*set_tx_csum)(struct net_device *, u32); - u32 (*get_sg)(struct net_device *); - int (*set_sg)(struct net_device *, u32); - u32 (*get_tso)(struct net_device *); - int (*set_tso)(struct net_device *, u32); - int (*self_test_count)(struct net_device *); - void (*self_test)(struct net_device *, struct ethtool_test *, u64 *); - void (*get_strings)(struct net_device *, u32 stringset, u8 *); - int (*phys_id)(struct net_device *, u32); - int (*get_stats_count)(struct net_device *); - void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, - u64 *); -} *ethtool_ops = NULL; - -#undef SET_ETHTOOL_OPS -#define SET_ETHTOOL_OPS(netdev, ops) (ethtool_ops = (ops)) - -/* - * Some useful ethtool_ops methods that are device independent. If we find that - * all drivers want to do the same thing here, we can turn these into dev_() - * function calls. - */ - -#undef ethtool_op_get_link -#define ethtool_op_get_link _kc_ethtool_op_get_link -u32 _kc_ethtool_op_get_link(struct net_device *dev) -{ - return netif_carrier_ok(dev) ? 1 : 0; -} - -#undef ethtool_op_get_tx_csum -#define ethtool_op_get_tx_csum _kc_ethtool_op_get_tx_csum -u32 _kc_ethtool_op_get_tx_csum(struct net_device *dev) -{ -#ifdef NETIF_F_IP_CSUM - return (dev->features & NETIF_F_IP_CSUM) != 0; -#else - return 0; -#endif -} - -#undef ethtool_op_set_tx_csum -#define ethtool_op_set_tx_csum _kc_ethtool_op_set_tx_csum -int _kc_ethtool_op_set_tx_csum(struct net_device *dev, u32 data) -{ -#ifdef NETIF_F_IP_CSUM - if (data) -#ifdef NETIF_F_IPV6_CSUM - dev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); - else - dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); -#else - dev->features |= NETIF_F_IP_CSUM; - else - dev->features &= ~NETIF_F_IP_CSUM; -#endif -#endif - - return 0; -} - -#undef ethtool_op_get_sg -#define ethtool_op_get_sg _kc_ethtool_op_get_sg -u32 _kc_ethtool_op_get_sg(struct net_device *dev) -{ -#ifdef NETIF_F_SG - return (dev->features & NETIF_F_SG) != 0; -#else - return 0; -#endif -} - -#undef ethtool_op_set_sg -#define ethtool_op_set_sg _kc_ethtool_op_set_sg -int _kc_ethtool_op_set_sg(struct net_device *dev, u32 data) -{ -#ifdef NETIF_F_SG - if (data) - dev->features |= NETIF_F_SG; - else - dev->features &= ~NETIF_F_SG; -#endif - - return 0; -} - -#undef ethtool_op_get_tso -#define ethtool_op_get_tso _kc_ethtool_op_get_tso -u32 _kc_ethtool_op_get_tso(struct net_device *dev) -{ -#ifdef NETIF_F_TSO - return (dev->features & NETIF_F_TSO) != 0; -#else - return 0; -#endif -} - -#undef ethtool_op_set_tso -#define ethtool_op_set_tso _kc_ethtool_op_set_tso -int _kc_ethtool_op_set_tso(struct net_device *dev, u32 data) -{ -#ifdef NETIF_F_TSO - if (data) - dev->features |= NETIF_F_TSO; - else - dev->features &= ~NETIF_F_TSO; -#endif - - return 0; -} - -/* Handlers for each ethtool command */ - -static int ethtool_get_settings(struct net_device *dev, void *useraddr) -{ - struct ethtool_cmd cmd = { ETHTOOL_GSET }; - int err; - - if (!ethtool_ops->get_settings) - return -EOPNOTSUPP; - - err = ethtool_ops->get_settings(dev, &cmd); - if (err < 0) - return err; - - if (copy_to_user(useraddr, &cmd, sizeof(cmd))) - return -EFAULT; - return 0; -} - -static int ethtool_set_settings(struct net_device *dev, void *useraddr) -{ - struct ethtool_cmd cmd; - - if (!ethtool_ops->set_settings) - return -EOPNOTSUPP; - - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) - return -EFAULT; - - return ethtool_ops->set_settings(dev, &cmd); -} - -static int ethtool_get_drvinfo(struct net_device *dev, void *useraddr) -{ - struct ethtool_drvinfo info; - struct ethtool_ops *ops = ethtool_ops; - - if (!ops->get_drvinfo) - return -EOPNOTSUPP; - - memset(&info, 0, sizeof(info)); - info.cmd = ETHTOOL_GDRVINFO; - ops->get_drvinfo(dev, &info); - - if (ops->self_test_count) - info.testinfo_len = ops->self_test_count(dev); - if (ops->get_stats_count) - info.n_stats = ops->get_stats_count(dev); - if (ops->get_regs_len) - info.regdump_len = ops->get_regs_len(dev); - if (ops->get_eeprom_len) - info.eedump_len = ops->get_eeprom_len(dev); - - if (copy_to_user(useraddr, &info, sizeof(info))) - return -EFAULT; - return 0; -} - -static int ethtool_get_regs(struct net_device *dev, char *useraddr) -{ - struct ethtool_regs regs; - struct ethtool_ops *ops = ethtool_ops; - void *regbuf; - int reglen, ret; - - if (!ops->get_regs || !ops->get_regs_len) - return -EOPNOTSUPP; - - if (copy_from_user(®s, useraddr, sizeof(regs))) - return -EFAULT; - - reglen = ops->get_regs_len(dev); - if (regs.len > reglen) - regs.len = reglen; - - regbuf = kmalloc(reglen, GFP_USER); - if (!regbuf) - return -ENOMEM; - - ops->get_regs(dev, ®s, regbuf); - - ret = -EFAULT; - if (copy_to_user(useraddr, ®s, sizeof(regs))) - goto out; - useraddr += offsetof(struct ethtool_regs, data); - if (copy_to_user(useraddr, regbuf, reglen)) - goto out; - ret = 0; - -out: - kfree(regbuf); - return ret; -} - -static int ethtool_get_wol(struct net_device *dev, char *useraddr) -{ - struct ethtool_wolinfo wol = { ETHTOOL_GWOL }; - - if (!ethtool_ops->get_wol) - return -EOPNOTSUPP; - - ethtool_ops->get_wol(dev, &wol); - - if (copy_to_user(useraddr, &wol, sizeof(wol))) - return -EFAULT; - return 0; -} - -static int ethtool_set_wol(struct net_device *dev, char *useraddr) -{ - struct ethtool_wolinfo wol; - - if (!ethtool_ops->set_wol) - return -EOPNOTSUPP; - - if (copy_from_user(&wol, useraddr, sizeof(wol))) - return -EFAULT; - - return ethtool_ops->set_wol(dev, &wol); -} - -static int ethtool_get_msglevel(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GMSGLVL }; - - if (!ethtool_ops->get_msglevel) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_msglevel(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_msglevel(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata; - - if (!ethtool_ops->set_msglevel) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - ethtool_ops->set_msglevel(dev, edata.data); - return 0; -} - -static int ethtool_nway_reset(struct net_device *dev) -{ - if (!ethtool_ops->nway_reset) - return -EOPNOTSUPP; - - return ethtool_ops->nway_reset(dev); -} - -static int ethtool_get_link(struct net_device *dev, void *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GLINK }; - - if (!ethtool_ops->get_link) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_link(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_get_eeprom(struct net_device *dev, void *useraddr) -{ - struct ethtool_eeprom eeprom; - struct ethtool_ops *ops = ethtool_ops; - u8 *data; - int ret; - - if (!ops->get_eeprom || !ops->get_eeprom_len) - return -EOPNOTSUPP; - - if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) - return -EFAULT; - - /* Check for wrap and zero */ - if (eeprom.offset + eeprom.len <= eeprom.offset) - return -EINVAL; - - /* Check for exceeding total eeprom len */ - if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) - return -EINVAL; - - data = kmalloc(eeprom.len, GFP_USER); - if (!data) - return -ENOMEM; - - ret = -EFAULT; - if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len)) - goto out; - - ret = ops->get_eeprom(dev, &eeprom, data); - if (ret) - goto out; - - ret = -EFAULT; - if (copy_to_user(useraddr, &eeprom, sizeof(eeprom))) - goto out; - if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len)) - goto out; - ret = 0; - -out: - kfree(data); - return ret; -} - -static int ethtool_set_eeprom(struct net_device *dev, void *useraddr) -{ - struct ethtool_eeprom eeprom; - struct ethtool_ops *ops = ethtool_ops; - u8 *data; - int ret; - - if (!ops->set_eeprom || !ops->get_eeprom_len) - return -EOPNOTSUPP; - - if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) - return -EFAULT; - - /* Check for wrap and zero */ - if (eeprom.offset + eeprom.len <= eeprom.offset) - return -EINVAL; - - /* Check for exceeding total eeprom len */ - if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) - return -EINVAL; - - data = kmalloc(eeprom.len, GFP_USER); - if (!data) - return -ENOMEM; - - ret = -EFAULT; - if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len)) - goto out; - - ret = ops->set_eeprom(dev, &eeprom, data); - if (ret) - goto out; - - if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len)) - ret = -EFAULT; - -out: - kfree(data); - return ret; -} - -static int ethtool_get_coalesce(struct net_device *dev, void *useraddr) -{ - struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE }; - - if (!ethtool_ops->get_coalesce) - return -EOPNOTSUPP; - - ethtool_ops->get_coalesce(dev, &coalesce); - - if (copy_to_user(useraddr, &coalesce, sizeof(coalesce))) - return -EFAULT; - return 0; -} - -static int ethtool_set_coalesce(struct net_device *dev, void *useraddr) -{ - struct ethtool_coalesce coalesce; - - if (!ethtool_ops->get_coalesce) - return -EOPNOTSUPP; - - if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) - return -EFAULT; - - return ethtool_ops->set_coalesce(dev, &coalesce); -} - -static int ethtool_get_ringparam(struct net_device *dev, void *useraddr) -{ - struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM }; - - if (!ethtool_ops->get_ringparam) - return -EOPNOTSUPP; - - ethtool_ops->get_ringparam(dev, &ringparam); - - if (copy_to_user(useraddr, &ringparam, sizeof(ringparam))) - return -EFAULT; - return 0; -} - -static int ethtool_set_ringparam(struct net_device *dev, void *useraddr) -{ - struct ethtool_ringparam ringparam; - - if (!ethtool_ops->get_ringparam) - return -EOPNOTSUPP; - - if (copy_from_user(&ringparam, useraddr, sizeof(ringparam))) - return -EFAULT; - - return ethtool_ops->set_ringparam(dev, &ringparam); -} - -static int ethtool_get_pauseparam(struct net_device *dev, void *useraddr) -{ - struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM }; - - if (!ethtool_ops->get_pauseparam) - return -EOPNOTSUPP; - - ethtool_ops->get_pauseparam(dev, &pauseparam); - - if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam))) - return -EFAULT; - return 0; -} - -static int ethtool_set_pauseparam(struct net_device *dev, void *useraddr) -{ - struct ethtool_pauseparam pauseparam; - - if (!ethtool_ops->get_pauseparam) - return -EOPNOTSUPP; - - if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam))) - return -EFAULT; - - return ethtool_ops->set_pauseparam(dev, &pauseparam); -} - -static int ethtool_get_rx_csum(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GRXCSUM }; - - if (!ethtool_ops->get_rx_csum) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_rx_csum(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_rx_csum(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata; - - if (!ethtool_ops->set_rx_csum) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - ethtool_ops->set_rx_csum(dev, edata.data); - return 0; -} - -static int ethtool_get_tx_csum(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GTXCSUM }; - - if (!ethtool_ops->get_tx_csum) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_tx_csum(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_tx_csum(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata; - - if (!ethtool_ops->set_tx_csum) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - return ethtool_ops->set_tx_csum(dev, edata.data); -} - -static int ethtool_get_sg(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GSG }; - - if (!ethtool_ops->get_sg) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_sg(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_sg(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata; - - if (!ethtool_ops->set_sg) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - return ethtool_ops->set_sg(dev, edata.data); -} - -static int ethtool_get_tso(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GTSO }; - - if (!ethtool_ops->get_tso) - return -EOPNOTSUPP; - - edata.data = ethtool_ops->get_tso(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_tso(struct net_device *dev, char *useraddr) -{ - struct ethtool_value edata; - - if (!ethtool_ops->set_tso) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - return ethtool_ops->set_tso(dev, edata.data); -} - -static int ethtool_self_test(struct net_device *dev, char *useraddr) -{ - struct ethtool_test test; - struct ethtool_ops *ops = ethtool_ops; - u64 *data; - int ret; - - if (!ops->self_test || !ops->self_test_count) - return -EOPNOTSUPP; - - if (copy_from_user(&test, useraddr, sizeof(test))) - return -EFAULT; - - test.len = ops->self_test_count(dev); - data = kmalloc(test.len * sizeof(u64), GFP_USER); - if (!data) - return -ENOMEM; - - ops->self_test(dev, &test, data); - - ret = -EFAULT; - if (copy_to_user(useraddr, &test, sizeof(test))) - goto out; - useraddr += sizeof(test); - if (copy_to_user(useraddr, data, test.len * sizeof(u64))) - goto out; - ret = 0; - -out: - kfree(data); - return ret; -} - -static int ethtool_get_strings(struct net_device *dev, void *useraddr) -{ - struct ethtool_gstrings gstrings; - struct ethtool_ops *ops = ethtool_ops; - u8 *data; - int ret; - - if (!ops->get_strings) - return -EOPNOTSUPP; - - if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) - return -EFAULT; - - switch (gstrings.string_set) { - case ETH_SS_TEST: - if (!ops->self_test_count) - return -EOPNOTSUPP; - gstrings.len = ops->self_test_count(dev); - break; - case ETH_SS_STATS: - if (!ops->get_stats_count) - return -EOPNOTSUPP; - gstrings.len = ops->get_stats_count(dev); - break; - default: - return -EINVAL; - } - - data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); - if (!data) - return -ENOMEM; - - ops->get_strings(dev, gstrings.string_set, data); - - ret = -EFAULT; - if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) - goto out; - useraddr += sizeof(gstrings); - if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) - goto out; - ret = 0; - -out: - kfree(data); - return ret; -} - -static int ethtool_phys_id(struct net_device *dev, void *useraddr) -{ - struct ethtool_value id; - - if (!ethtool_ops->phys_id) - return -EOPNOTSUPP; - - if (copy_from_user(&id, useraddr, sizeof(id))) - return -EFAULT; - - return ethtool_ops->phys_id(dev, id.data); -} - -static int ethtool_get_stats(struct net_device *dev, void *useraddr) -{ - struct ethtool_stats stats; - struct ethtool_ops *ops = ethtool_ops; - u64 *data; - int ret; - - if (!ops->get_ethtool_stats || !ops->get_stats_count) - return -EOPNOTSUPP; - - if (copy_from_user(&stats, useraddr, sizeof(stats))) - return -EFAULT; - - stats.n_stats = ops->get_stats_count(dev); - data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER); - if (!data) - return -ENOMEM; - - ops->get_ethtool_stats(dev, &stats, data); - - ret = -EFAULT; - if (copy_to_user(useraddr, &stats, sizeof(stats))) - goto out; - useraddr += sizeof(stats); - if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64))) - goto out; - ret = 0; - -out: - kfree(data); - return ret; -} - -/* The main entry point in this file. Called from net/core/dev.c */ - -#define ETHTOOL_OPS_COMPAT -int ethtool_ioctl(struct ifreq *ifr) -{ - struct net_device *dev = __dev_get_by_name(ifr->ifr_name); - void *useraddr = (void *) ifr->ifr_data; - u32 ethcmd; - - /* - * XXX: This can be pushed down into the ethtool_* handlers that - * need it. Keep existing behavior for the moment. - */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (!dev || !netif_device_present(dev)) - return -ENODEV; - - if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) - return -EFAULT; - - switch (ethcmd) { - case ETHTOOL_GSET: - return ethtool_get_settings(dev, useraddr); - case ETHTOOL_SSET: - return ethtool_set_settings(dev, useraddr); - case ETHTOOL_GDRVINFO: - return ethtool_get_drvinfo(dev, useraddr); - case ETHTOOL_GREGS: - return ethtool_get_regs(dev, useraddr); - case ETHTOOL_GWOL: - return ethtool_get_wol(dev, useraddr); - case ETHTOOL_SWOL: - return ethtool_set_wol(dev, useraddr); - case ETHTOOL_GMSGLVL: - return ethtool_get_msglevel(dev, useraddr); - case ETHTOOL_SMSGLVL: - return ethtool_set_msglevel(dev, useraddr); - case ETHTOOL_NWAY_RST: - return ethtool_nway_reset(dev); - case ETHTOOL_GLINK: - return ethtool_get_link(dev, useraddr); - case ETHTOOL_GEEPROM: - return ethtool_get_eeprom(dev, useraddr); - case ETHTOOL_SEEPROM: - return ethtool_set_eeprom(dev, useraddr); - case ETHTOOL_GCOALESCE: - return ethtool_get_coalesce(dev, useraddr); - case ETHTOOL_SCOALESCE: - return ethtool_set_coalesce(dev, useraddr); - case ETHTOOL_GRINGPARAM: - return ethtool_get_ringparam(dev, useraddr); - case ETHTOOL_SRINGPARAM: - return ethtool_set_ringparam(dev, useraddr); - case ETHTOOL_GPAUSEPARAM: - return ethtool_get_pauseparam(dev, useraddr); - case ETHTOOL_SPAUSEPARAM: - return ethtool_set_pauseparam(dev, useraddr); - case ETHTOOL_GRXCSUM: - return ethtool_get_rx_csum(dev, useraddr); - case ETHTOOL_SRXCSUM: - return ethtool_set_rx_csum(dev, useraddr); - case ETHTOOL_GTXCSUM: - return ethtool_get_tx_csum(dev, useraddr); - case ETHTOOL_STXCSUM: - return ethtool_set_tx_csum(dev, useraddr); - case ETHTOOL_GSG: - return ethtool_get_sg(dev, useraddr); - case ETHTOOL_SSG: - return ethtool_set_sg(dev, useraddr); - case ETHTOOL_GTSO: - return ethtool_get_tso(dev, useraddr); - case ETHTOOL_STSO: - return ethtool_set_tso(dev, useraddr); - case ETHTOOL_TEST: - return ethtool_self_test(dev, useraddr); - case ETHTOOL_GSTRINGS: - return ethtool_get_strings(dev, useraddr); - case ETHTOOL_PHYS_ID: - return ethtool_phys_id(dev, useraddr); - case ETHTOOL_GSTATS: - return ethtool_get_stats(dev, useraddr); - default: - return -EOPNOTSUPP; - } - - return -EOPNOTSUPP; -} - -#define mii_if_info _kc_mii_if_info -struct _kc_mii_if_info { - int phy_id; - int advertising; - int phy_id_mask; - int reg_num_mask; - - unsigned int full_duplex : 1; /* is full duplex? */ - unsigned int force_media : 1; /* is autoneg. disabled? */ - - struct net_device *dev; - int (*mdio_read) (struct net_device *dev, int phy_id, int location); - void (*mdio_write) (struct net_device *dev, int phy_id, int location, int val); -}; - -struct ethtool_cmd; -struct mii_ioctl_data; - -#undef mii_link_ok -#define mii_link_ok _kc_mii_link_ok -#undef mii_nway_restart -#define mii_nway_restart _kc_mii_nway_restart -#undef mii_ethtool_gset -#define mii_ethtool_gset _kc_mii_ethtool_gset -#undef mii_ethtool_sset -#define mii_ethtool_sset _kc_mii_ethtool_sset -#undef mii_check_link -#define mii_check_link _kc_mii_check_link -extern int _kc_mii_link_ok (struct mii_if_info *mii); -extern int _kc_mii_nway_restart (struct mii_if_info *mii); -extern int _kc_mii_ethtool_gset(struct mii_if_info *mii, - struct ethtool_cmd *ecmd); -extern int _kc_mii_ethtool_sset(struct mii_if_info *mii, - struct ethtool_cmd *ecmd); -extern void _kc_mii_check_link (struct mii_if_info *mii); -#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) ) -#undef generic_mii_ioctl -#define generic_mii_ioctl _kc_generic_mii_ioctl -extern int _kc_generic_mii_ioctl(struct mii_if_info *mii_if, - struct mii_ioctl_data *mii_data, int cmd, - unsigned int *duplex_changed); -#endif /* > 2.4.6 */ - - -struct _kc_pci_dev_ext { - struct pci_dev *dev; - void *pci_drvdata; - struct pci_driver *driver; -}; - -struct _kc_net_dev_ext { - struct net_device *dev; - unsigned int carrier; -}; - - -/**************************************/ -/* mii support */ - -int _kc_mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) -{ - struct net_device *dev = mii->dev; - u32 advert, bmcr, lpa, nego; - - ecmd->supported = - (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | - SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | - SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII); - - /* only supports twisted-pair */ - ecmd->port = PORT_MII; - - /* only supports internal transceiver */ - ecmd->transceiver = XCVR_INTERNAL; - - /* this isn't fully supported at higher layers */ - ecmd->phy_address = mii->phy_id; - - ecmd->advertising = ADVERTISED_TP | ADVERTISED_MII; - advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE); - if (advert & ADVERTISE_10HALF) - ecmd->advertising |= ADVERTISED_10baseT_Half; - if (advert & ADVERTISE_10FULL) - ecmd->advertising |= ADVERTISED_10baseT_Full; - if (advert & ADVERTISE_100HALF) - ecmd->advertising |= ADVERTISED_100baseT_Half; - if (advert & ADVERTISE_100FULL) - ecmd->advertising |= ADVERTISED_100baseT_Full; - - bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); - lpa = mii->mdio_read(dev, mii->phy_id, MII_LPA); - if (bmcr & BMCR_ANENABLE) { - ecmd->advertising |= ADVERTISED_Autoneg; - ecmd->autoneg = AUTONEG_ENABLE; - - nego = mii_nway_result(advert & lpa); - if (nego == LPA_100FULL || nego == LPA_100HALF) - ecmd->speed = SPEED_100; - else - ecmd->speed = SPEED_10; - if (nego == LPA_100FULL || nego == LPA_10FULL) { - ecmd->duplex = DUPLEX_FULL; - mii->full_duplex = 1; - } else { - ecmd->duplex = DUPLEX_HALF; - mii->full_duplex = 0; - } - } else { - ecmd->autoneg = AUTONEG_DISABLE; - - ecmd->speed = (bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10; - ecmd->duplex = (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF; - } - - /* ignore maxtxpkt, maxrxpkt for now */ - - return 0; -} - -int _kc_mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) -{ - struct net_device *dev = mii->dev; - - if (ecmd->speed != SPEED_10 && ecmd->speed != SPEED_100) - return -EINVAL; - if (ecmd->duplex != DUPLEX_HALF && ecmd->duplex != DUPLEX_FULL) - return -EINVAL; - if (ecmd->port != PORT_MII) - return -EINVAL; - if (ecmd->transceiver != XCVR_INTERNAL) - return -EINVAL; - if (ecmd->phy_address != mii->phy_id) - return -EINVAL; - if (ecmd->autoneg != AUTONEG_DISABLE && ecmd->autoneg != AUTONEG_ENABLE) - return -EINVAL; - - /* ignore supported, maxtxpkt, maxrxpkt */ - - if (ecmd->autoneg == AUTONEG_ENABLE) { - u32 bmcr, advert, tmp; - - if ((ecmd->advertising & (ADVERTISED_10baseT_Half | - ADVERTISED_10baseT_Full | - ADVERTISED_100baseT_Half | - ADVERTISED_100baseT_Full)) == 0) - return -EINVAL; - - /* advertise only what has been requested */ - advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE); - tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4); - if (ADVERTISED_10baseT_Half) - tmp |= ADVERTISE_10HALF; - if (ADVERTISED_10baseT_Full) - tmp |= ADVERTISE_10FULL; - if (ADVERTISED_100baseT_Half) - tmp |= ADVERTISE_100HALF; - if (ADVERTISED_100baseT_Full) - tmp |= ADVERTISE_100FULL; - if (advert != tmp) { - mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp); - mii->advertising = tmp; - } - - /* turn on autonegotiation, and force a renegotiate */ - bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); - bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART); - mii->mdio_write(dev, mii->phy_id, MII_BMCR, bmcr); - - mii->force_media = 0; - } else { - u32 bmcr, tmp; - - /* turn off auto negotiation, set speed and duplexity */ - bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); - tmp = bmcr & ~(BMCR_ANENABLE | BMCR_SPEED100 | BMCR_FULLDPLX); - if (ecmd->speed == SPEED_100) - tmp |= BMCR_SPEED100; - if (ecmd->duplex == DUPLEX_FULL) { - tmp |= BMCR_FULLDPLX; - mii->full_duplex = 1; - } else - mii->full_duplex = 0; - if (bmcr != tmp) - mii->mdio_write(dev, mii->phy_id, MII_BMCR, tmp); - - mii->force_media = 1; - } - return 0; -} - -int _kc_mii_link_ok (struct mii_if_info *mii) -{ - /* first, a dummy read, needed to latch some MII phys */ - mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR); - if (mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR) & BMSR_LSTATUS) - return 1; - return 0; -} - -int _kc_mii_nway_restart (struct mii_if_info *mii) -{ - int bmcr; - int r = -EINVAL; - - /* if autoneg is off, it's an error */ - bmcr = mii->mdio_read(mii->dev, mii->phy_id, MII_BMCR); - - if (bmcr & BMCR_ANENABLE) { - bmcr |= BMCR_ANRESTART; - mii->mdio_write(mii->dev, mii->phy_id, MII_BMCR, bmcr); - r = 0; - } - - return r; -} - -void _kc_mii_check_link (struct mii_if_info *mii) -{ - int cur_link = mii_link_ok(mii); - int prev_link = netif_carrier_ok(mii->dev); - - if (cur_link && !prev_link) - netif_carrier_on(mii->dev); - else if (prev_link && !cur_link) - netif_carrier_off(mii->dev); -} - -#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) ) -int _kc_generic_mii_ioctl(struct mii_if_info *mii_if, - struct mii_ioctl_data *mii_data, int cmd, - unsigned int *duplex_chg_out) -{ - int rc = 0; - unsigned int duplex_changed = 0; - - if (duplex_chg_out) - *duplex_chg_out = 0; - - mii_data->phy_id &= mii_if->phy_id_mask; - mii_data->reg_num &= mii_if->reg_num_mask; - - switch(cmd) { - case SIOCDEVPRIVATE: /* binary compat, remove in 2.5 */ - case SIOCGMIIPHY: - mii_data->phy_id = mii_if->phy_id; - /* fall through */ - - case SIOCDEVPRIVATE + 1:/* binary compat, remove in 2.5 */ - case SIOCGMIIREG: - mii_data->val_out = - mii_if->mdio_read(mii_if->dev, mii_data->phy_id, - mii_data->reg_num); - break; - - case SIOCDEVPRIVATE + 2:/* binary compat, remove in 2.5 */ - case SIOCSMIIREG: { - u16 val = mii_data->val_in; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (mii_data->phy_id == mii_if->phy_id) { - switch(mii_data->reg_num) { - case MII_BMCR: { - unsigned int new_duplex = 0; - if (val & (BMCR_RESET|BMCR_ANENABLE)) - mii_if->force_media = 0; - else - mii_if->force_media = 1; - if (mii_if->force_media && - (val & BMCR_FULLDPLX)) - new_duplex = 1; - if (mii_if->full_duplex != new_duplex) { - duplex_changed = 1; - mii_if->full_duplex = new_duplex; - } - break; - } - case MII_ADVERTISE: - mii_if->advertising = val; - break; - default: - /* do nothing */ - break; - } - } - - mii_if->mdio_write(mii_if->dev, mii_data->phy_id, - mii_data->reg_num, val); - break; - } - - default: - rc = -EOPNOTSUPP; - break; - } - - if ((rc == 0) && (duplex_chg_out) && (duplex_changed)) - *duplex_chg_out = 1; - - return rc; -} -#endif /* > 2.4.6 */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING deleted file mode 100644 index 5f297e5b..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING +++ /dev/null @@ -1,339 +0,0 @@ - -"This software program is licensed subject to the GNU General Public License -(GPL). Version 2, June 1991, available at -" - -GNU General Public License - -Version 2, June 1991 - -Copyright (C) 1989, 1991 Free Software Foundation, Inc. -59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - -Everyone is permitted to copy and distribute verbatim copies of this license -document, but changing it is not allowed. - -Preamble - -The licenses for most software are designed to take away your freedom to -share and change it. By contrast, the GNU General Public License is intended -to guarantee your freedom to share and change free software--to make sure -the software is free for all its users. This General Public License applies -to most of the Free Software Foundation's software and to any other program -whose authors commit to using it. (Some other Free Software Foundation -software is covered by the GNU Library General Public License instead.) You -can apply it to your programs, too. - -When we speak of free software, we are referring to freedom, not price. Our -General Public Licenses are designed to make sure that you have the freedom -to distribute copies of free software (and charge for this service if you -wish), that you receive source code or can get it if you want it, that you -can change the software or use pieces of it in new free programs; and that -you know you can do these things. - -To protect your rights, we need to make restrictions that forbid anyone to -deny you these rights or to ask you to surrender the rights. These -restrictions translate to certain responsibilities for you if you distribute -copies of the software, or if you modify it. - -For example, if you distribute copies of such a program, whether gratis or -for a fee, you must give the recipients all the rights that you have. You -must make sure that they, too, receive or can get the source code. And you -must show them these terms so they know their rights. - -We protect your rights with two steps: (1) copyright the software, and (2) -offer you this license which gives you legal permission to copy, distribute -and/or modify the software. - -Also, for each author's protection and ours, we want to make certain that -everyone understands that there is no warranty for this free software. If -the software is modified by someone else and passed on, we want its -recipients to know that what they have is not the original, so that any -problems introduced by others will not reflect on the original authors' -reputations. - -Finally, any free program is threatened constantly by software patents. We -wish to avoid the danger that redistributors of a free program will -individually obtain patent licenses, in effect making the program -proprietary. To prevent this, we have made it clear that any patent must be -licensed for everyone's free use or not licensed at all. - -The precise terms and conditions for copying, distribution and modification -follow. - -TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - -0. This License applies to any program or other work which contains a notice - placed by the copyright holder saying it may be distributed under the - terms of this General Public License. The "Program", below, refers to any - such program or work, and a "work based on the Program" means either the - Program or any derivative work under copyright law: that is to say, a - work containing the Program or a portion of it, either verbatim or with - modifications and/or translated into another language. (Hereinafter, - translation is included without limitation in the term "modification".) - Each licensee is addressed as "you". - - Activities other than copying, distribution and modification are not - covered by this License; they are outside its scope. The act of running - the Program is not restricted, and the output from the Program is covered - only if its contents constitute a work based on the Program (independent - of having been made by running the Program). Whether that is true depends - on what the Program does. - -1. You may copy and distribute verbatim copies of the Program's source code - as you receive it, in any medium, provided that you conspicuously and - appropriately publish on each copy an appropriate copyright notice and - disclaimer of warranty; keep intact all the notices that refer to this - License and to the absence of any warranty; and give any other recipients - of the Program a copy of this License along with the Program. - - You may charge a fee for the physical act of transferring a copy, and you - may at your option offer warranty protection in exchange for a fee. - -2. You may modify your copy or copies of the Program or any portion of it, - thus forming a work based on the Program, and copy and distribute such - modifications or work under the terms of Section 1 above, provided that - you also meet all of these conditions: - - * a) You must cause the modified files to carry prominent notices stating - that you changed the files and the date of any change. - - * b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any part - thereof, to be licensed as a whole at no charge to all third parties - under the terms of this License. - - * c) If the modified program normally reads commands interactively when - run, you must cause it, when started running for such interactive - use in the most ordinary way, to print or display an announcement - including an appropriate copyright notice and a notice that there is - no warranty (or else, saying that you provide a warranty) and that - users may redistribute the program under these conditions, and - telling the user how to view a copy of this License. (Exception: if - the Program itself is interactive but does not normally print such - an announcement, your work based on the Program is not required to - print an announcement.) - - These requirements apply to the modified work as a whole. If identifiable - sections of that work are not derived from the Program, and can be - reasonably considered independent and separate works in themselves, then - this License, and its terms, do not apply to those sections when you - distribute them as separate works. But when you distribute the same - sections as part of a whole which is a work based on the Program, the - distribution of the whole must be on the terms of this License, whose - permissions for other licensees extend to the entire whole, and thus to - each and every part regardless of who wrote it. - - Thus, it is not the intent of this section to claim rights or contest - your rights to work written entirely by you; rather, the intent is to - exercise the right to control the distribution of derivative or - collective works based on the Program. - - In addition, mere aggregation of another work not based on the Program - with the Program (or with a work based on the Program) on a volume of a - storage or distribution medium does not bring the other work under the - scope of this License. - -3. You may copy and distribute the Program (or a work based on it, under - Section 2) in object code or executable form under the terms of Sections - 1 and 2 above provided that you also do one of the following: - - * a) Accompany it with the complete corresponding machine-readable source - code, which must be distributed under the terms of Sections 1 and 2 - above on a medium customarily used for software interchange; or, - - * b) Accompany it with a written offer, valid for at least three years, - to give any third party, for a charge no more than your cost of - physically performing source distribution, a complete machine- - readable copy of the corresponding source code, to be distributed - under the terms of Sections 1 and 2 above on a medium customarily - used for software interchange; or, - - * c) Accompany it with the information you received as to the offer to - distribute corresponding source code. (This alternative is allowed - only for noncommercial distribution and only if you received the - program in object code or executable form with such an offer, in - accord with Subsection b above.) - - The source code for a work means the preferred form of the work for - making modifications to it. For an executable work, complete source code - means all the source code for all modules it contains, plus any - associated interface definition files, plus the scripts used to control - compilation and installation of the executable. However, as a special - exception, the source code distributed need not include anything that is - normally distributed (in either source or binary form) with the major - components (compiler, kernel, and so on) of the operating system on which - the executable runs, unless that component itself accompanies the - executable. - - If distribution of executable or object code is made by offering access - to copy from a designated place, then offering equivalent access to copy - the source code from the same place counts as distribution of the source - code, even though third parties are not compelled to copy the source - along with the object code. - -4. You may not copy, modify, sublicense, or distribute the Program except as - expressly provided under this License. Any attempt otherwise to copy, - modify, sublicense or distribute the Program is void, and will - automatically terminate your rights under this License. However, parties - who have received copies, or rights, from you under this License will not - have their licenses terminated so long as such parties remain in full - compliance. - -5. You are not required to accept this License, since you have not signed - it. However, nothing else grants you permission to modify or distribute - the Program or its derivative works. These actions are prohibited by law - if you do not accept this License. Therefore, by modifying or - distributing the Program (or any work based on the Program), you - indicate your acceptance of this License to do so, and all its terms and - conditions for copying, distributing or modifying the Program or works - based on it. - -6. Each time you redistribute the Program (or any work based on the - Program), the recipient automatically receives a license from the - original licensor to copy, distribute or modify the Program subject to - these terms and conditions. You may not impose any further restrictions - on the recipients' exercise of the rights granted herein. You are not - responsible for enforcing compliance by third parties to this License. - -7. If, as a consequence of a court judgment or allegation of patent - infringement or for any other reason (not limited to patent issues), - conditions are imposed on you (whether by court order, agreement or - otherwise) that contradict the conditions of this License, they do not - excuse you from the conditions of this License. If you cannot distribute - so as to satisfy simultaneously your obligations under this License and - any other pertinent obligations, then as a consequence you may not - distribute the Program at all. For example, if a patent license would - not permit royalty-free redistribution of the Program by all those who - receive copies directly or indirectly through you, then the only way you - could satisfy both it and this License would be to refrain entirely from - distribution of the Program. - - If any portion of this section is held invalid or unenforceable under any - particular circumstance, the balance of the section is intended to apply - and the section as a whole is intended to apply in other circumstances. - - It is not the purpose of this section to induce you to infringe any - patents or other property right claims or to contest validity of any - such claims; this section has the sole purpose of protecting the - integrity of the free software distribution system, which is implemented - by public license practices. Many people have made generous contributions - to the wide range of software distributed through that system in - reliance on consistent application of that system; it is up to the - author/donor to decide if he or she is willing to distribute software - through any other system and a licensee cannot impose that choice. - - This section is intended to make thoroughly clear what is believed to be - a consequence of the rest of this License. - -8. If the distribution and/or use of the Program is restricted in certain - countries either by patents or by copyrighted interfaces, the original - copyright holder who places the Program under this License may add an - explicit geographical distribution limitation excluding those countries, - so that distribution is permitted only in or among countries not thus - excluded. In such case, this License incorporates the limitation as if - written in the body of this License. - -9. The Free Software Foundation may publish revised and/or new versions of - the General Public License from time to time. Such new versions will be - similar in spirit to the present version, but may differ in detail to - address new problems or concerns. - - Each version is given a distinguishing version number. If the Program - specifies a version number of this License which applies to it and "any - later version", you have the option of following the terms and - conditions either of that version or of any later version published by - the Free Software Foundation. If the Program does not specify a version - number of this License, you may choose any version ever published by the - Free Software Foundation. - -10. If you wish to incorporate parts of the Program into other free programs - whose distribution conditions are different, write to the author to ask - for permission. For software which is copyrighted by the Free Software - Foundation, write to the Free Software Foundation; we sometimes make - exceptions for this. Our decision will be guided by the two goals of - preserving the free status of all derivatives of our free software and - of promoting the sharing and reuse of software generally. - - NO WARRANTY - -11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY - FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN - OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES - PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER - EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE - ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH - YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL - NECESSARY SERVICING, REPAIR OR CORRECTION. - -12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING - WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR - REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR - DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL - DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM - (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED - INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF - THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR - OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - -END OF TERMS AND CONDITIONS - -How to Apply These Terms to Your New Programs - -If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it free -software which everyone can redistribute and change under these terms. - -To do so, attach the following notices to the program. It is safest to -attach them to the start of each source file to most effectively convey the -exclusion of warranty; and each file should have at least the "copyright" -line and a pointer to where the full notice is found. - -one line to give the program's name and an idea of what it does. -Copyright (C) yyyy name of author - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2 of the License, or (at your option) -any later version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 -Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this when -it starts in an interactive mode: - -Gnomovision version 69, Copyright (C) year name of author Gnomovision comes -with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free -software, and you are welcome to redistribute it under certain conditions; -type 'show c' for details. - -The hypothetical commands 'show w' and 'show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may be -called something other than 'show w' and 'show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - -Yoyodyne, Inc., hereby disclaims all copyright interest in the program -'Gnomovision' (which makes passes at compilers) written by James Hacker. - -signature of Ty Coon, 1 April 1989 -Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General Public -License instead of this License. diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h index 222c2c71..59415469 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c index 24015844..e17b7f18 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h index c6abb020..00a584f4 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c index c6f4130d..30de47eb 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h index 02be92ab..41024400 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c index ef7ce629..f00fe796 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h index a6ab30d2..98b74000 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c index 93659ca0..88b33fa0 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h index 9bd6f534..6ae5926f 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h index a6690451..5e6f9ac9 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c index 11472bd3..bc3cb2f4 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h index cad28622..48f7dcfc 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c index 238028d0..d26016c9 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h index 124f00de..5ced84f8 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h index d161600b..c6f8e21f 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c index e3f5275e..234fa632 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h index bbe5a9e3..5ae171ac 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h deleted file mode 100644 index 5e3559fd..00000000 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h +++ /dev/null @@ -1,73 +0,0 @@ -/******************************************************************************* - - Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - - -#ifndef _IXGBE_SRIOV_H_ -#define _IXGBE_SRIOV_H_ - -int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter, - int entries, u16 *hash_list, u32 vf); -void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter); -int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, u32 vf); -void ixgbe_set_vmolr(struct ixgbe_hw *hw, u32 vf, bool aupe); -void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf); -void ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf); -void ixgbe_msg_task(struct ixgbe_adapter *adapter); -int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter, - int vf, unsigned char *mac_addr); -void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter); -void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter); -#ifdef IFLA_VF_MAX -int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac); -int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan, - u8 qos); -int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); -#ifdef HAVE_VF_SPOOFCHK_CONFIGURE -int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); -#endif -int ixgbe_ndo_get_vf_config(struct net_device *netdev, - int vf, struct ifla_vf_info *ivi); -#endif -void ixgbe_disable_sriov(struct ixgbe_adapter *adapter); -#ifdef CONFIG_PCI_IOV -int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask); -void ixgbe_enable_sriov(struct ixgbe_adapter *adapter); -#endif -int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter); -#ifdef IFLA_VF_MAX -void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter); -#endif /* IFLA_VF_MAX */ -void ixgbe_dump_registers(struct ixgbe_adapter *adapter); - -/* - * These are defined in ixgbe_type.h on behalf of the VF driver - * but we need them here unwrapped for the PF driver. - */ -#define IXGBE_DEV_ID_82599_VF 0x10ED -#define IXGBE_DEV_ID_X540_VF 0x1515 - -#endif /* _IXGBE_SRIOV_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h index 6b21c879..bda61fa4 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c index b99d9e84..2affe242 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h index 77e8952d..38bcc87b 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c index 5f2523ed..d84c7ccb 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h index bf27579b..4c7a6408 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h @@ -17,7 +17,7 @@ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. The full GNU General Public License is included in this distribution in - the file called "COPYING". + the file called "LICENSE.GPL". Contact Information: e1000-devel Mailing List @@ -3140,4 +3140,16 @@ static inline int __kc_pci_vfs_assigned(struct pci_dev *dev) #define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops)) #endif /* >= 3.16.0 */ +/* + * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4) + * For older kernels backported this commit, need to use renamed functions. + * This fix is specific to RedHat/CentOS kernels. + */ +#if (defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)) +#define vlan_tx_tag_get skb_vlan_tag_get +#define vlan_tx_tag_present skb_vlan_tag_present +#endif + #endif /* _KCOMPAT_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/kni_dev.h b/lib/librte_eal/linuxapp/kni/kni_dev.h index a0e5cb6b..58cbadd3 100644 --- a/lib/librte_eal/linuxapp/kni/kni_dev.h +++ b/lib/librte_eal/linuxapp/kni/kni_dev.h @@ -25,6 +25,11 @@ #ifndef _KNI_DEV_H_ #define _KNI_DEV_H_ +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -39,10 +44,11 @@ #include #define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */ +#define MBUF_BURST_SZ 32 + /** * A structure describing the private information for a kni device. */ - struct kni_dev { /* kni list */ struct list_head list; @@ -50,7 +56,7 @@ struct kni_dev { struct net_device_stats stats; int status; uint16_t group_id; /* Group ID of a group of KNI devices */ - unsigned core_id; /* Core ID to bind */ + uint32_t core_id; /* Core ID to bind */ char name[RTE_KNI_NAMESIZE]; /* Network device name */ struct task_struct *pthread; @@ -84,38 +90,36 @@ struct kni_dev { /* response queue */ void *resp_q; - void * sync_kva; + void *sync_kva; void *sync_va; void *mbuf_kva; void *mbuf_va; /* mbuf size */ - unsigned mbuf_size; + uint32_t mbuf_size; /* synchro for request processing */ unsigned long synchro; #ifdef RTE_KNI_VHOST - struct kni_vhost_queue* vhost_queue; + struct kni_vhost_queue *vhost_queue; + volatile enum { BE_STOP = 0x1, BE_START = 0x2, BE_FINISH = 0x4, - }vq_status; + } vq_status; #endif + /* buffers */ + void *pa[MBUF_BURST_SZ]; + void *va[MBUF_BURST_SZ]; + void *alloc_pa[MBUF_BURST_SZ]; + void *alloc_va[MBUF_BURST_SZ]; }; -#define KNI_ERR(args...) printk(KERN_DEBUG "KNI: Error: " args) -#define KNI_PRINT(args...) printk(KERN_DEBUG "KNI: " args) -#ifdef RTE_KNI_KO_DEBUG - #define KNI_DBG(args...) printk(KERN_DEBUG "KNI: " args) -#else - #define KNI_DBG(args...) -#endif - #ifdef RTE_KNI_VHOST -unsigned int +uint32_t kni_poll(struct file *file, struct socket *sock, poll_table * wait); int kni_chk_vhost_rx(struct kni_dev *kni); int kni_vhost_init(struct kni_dev *kni); @@ -127,23 +131,22 @@ struct kni_vhost_queue { int vnet_hdr_sz; struct kni_dev *kni; int sockfd; - unsigned int flags; - struct sk_buff* cache; - struct rte_kni_fifo* fifo; + uint32_t flags; + struct sk_buff *cache; + struct rte_kni_fifo *fifo; }; #endif -#ifdef RTE_KNI_VHOST_DEBUG_RX - #define KNI_DBG_RX(args...) printk(KERN_DEBUG "KNI RX: " args) -#else - #define KNI_DBG_RX(args...) -#endif +void kni_net_rx(struct kni_dev *kni); +void kni_net_init(struct net_device *dev); +void kni_net_config_lo_mode(char *lo_str); +void kni_net_poll_resp(struct kni_dev *kni); +void kni_set_ethtool_ops(struct net_device *netdev); -#ifdef RTE_KNI_VHOST_DEBUG_TX - #define KNI_DBG_TX(args...) printk(KERN_DEBUG "KNI TX: " args) -#else - #define KNI_DBG_TX(args...) -#endif +int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); +void ixgbe_kni_remove(struct pci_dev *pdev); +int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); +void igb_kni_remove(struct pci_dev *pdev); #endif diff --git a/lib/librte_eal/linuxapp/kni/kni_ethtool.c b/lib/librte_eal/linuxapp/kni/kni_ethtool.c index 06b6d463..0c88589c 100644 --- a/lib/librte_eal/linuxapp/kni/kni_ethtool.c +++ b/lib/librte_eal/linuxapp/kni/kni_ethtool.c @@ -31,6 +31,7 @@ static int kni_check_if_running(struct net_device *dev) { struct kni_dev *priv = netdev_priv(dev); + if (priv->lad_dev) return 0; else @@ -41,6 +42,7 @@ static void kni_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_drvinfo(priv->lad_dev, info); } @@ -48,6 +50,7 @@ static int kni_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_settings(priv->lad_dev, ecmd); } @@ -55,6 +58,7 @@ static int kni_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_settings(priv->lad_dev, ecmd); } @@ -62,6 +66,7 @@ static void kni_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_wol(priv->lad_dev, wol); } @@ -69,6 +74,7 @@ static int kni_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_wol(priv->lad_dev, wol); } @@ -76,6 +82,7 @@ static int kni_nway_reset(struct net_device *dev) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->nway_reset(priv->lad_dev); } @@ -83,6 +90,7 @@ static int kni_get_eeprom_len(struct net_device *dev) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_eeprom_len(priv->lad_dev); } @@ -91,6 +99,7 @@ kni_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *bytes) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_eeprom(priv->lad_dev, eeprom, bytes); } @@ -100,6 +109,7 @@ kni_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *bytes) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_eeprom(priv->lad_dev, eeprom, bytes); } @@ -108,6 +118,7 @@ static void kni_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_ringparam(priv->lad_dev, ring); } @@ -115,6 +126,7 @@ static int kni_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_ringparam(priv->lad_dev, ring); } @@ -122,6 +134,7 @@ static void kni_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_pauseparam(priv->lad_dev, pause); } @@ -129,6 +142,7 @@ static int kni_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_pauseparam(priv->lad_dev, pause); } @@ -137,6 +151,7 @@ static u32 kni_get_msglevel(struct net_device *dev) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_msglevel(priv->lad_dev); } @@ -144,6 +159,7 @@ static void kni_set_msglevel(struct net_device *dev, u32 data) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->set_msglevel(priv->lad_dev, data); } @@ -151,6 +167,7 @@ static int kni_get_regs_len(struct net_device *dev) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_regs_len(priv->lad_dev); } @@ -158,6 +175,7 @@ static void kni_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *p) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_regs(priv->lad_dev, regs, p); } @@ -165,6 +183,7 @@ static void kni_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_strings(priv->lad_dev, stringset, data); } @@ -173,6 +192,7 @@ static int kni_get_sset_count(struct net_device *dev, int sset) { struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_sset_count(priv->lad_dev, sset); } @@ -181,24 +201,25 @@ kni_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_ethtool_stats(priv->lad_dev, stats, data); } struct ethtool_ops kni_ethtool_ops = { - .begin = kni_check_if_running, + .begin = kni_check_if_running, .get_drvinfo = kni_get_drvinfo, .get_settings = kni_get_settings, .set_settings = kni_set_settings, .get_regs_len = kni_get_regs_len, - .get_regs = kni_get_regs, - .get_wol = kni_get_wol, - .set_wol = kni_set_wol, - .nway_reset = kni_nway_reset, - .get_link = ethtool_op_get_link, + .get_regs = kni_get_regs, + .get_wol = kni_get_wol, + .set_wol = kni_set_wol, + .nway_reset = kni_nway_reset, + .get_link = ethtool_op_get_link, .get_eeprom_len = kni_get_eeprom_len, - .get_eeprom = kni_get_eeprom, - .set_eeprom = kni_set_eeprom, + .get_eeprom = kni_get_eeprom, + .set_eeprom = kni_set_eeprom, .get_ringparam = kni_get_ringparam, .set_ringparam = kni_set_ringparam, .get_pauseparam = kni_get_pauseparam, @@ -207,7 +228,7 @@ struct ethtool_ops kni_ethtool_ops = { .set_msglevel = kni_set_msglevel, .get_strings = kni_get_strings, .get_sset_count = kni_get_sset_count, - .get_ethtool_stats = kni_get_ethtool_stats, + .get_ethtool_stats = kni_get_ethtool_stats, }; void diff --git a/lib/librte_eal/linuxapp/kni/kni_fifo.h b/lib/librte_eal/linuxapp/kni/kni_fifo.h index 3ea750e2..025ec1c9 100644 --- a/lib/librte_eal/linuxapp/kni/kni_fifo.h +++ b/lib/librte_eal/linuxapp/kni/kni_fifo.h @@ -30,13 +30,13 @@ /** * Adds num elements into the fifo. Return the number actually written */ -static inline unsigned -kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num) +static inline uint32_t +kni_fifo_put(struct rte_kni_fifo *fifo, void **data, uint32_t num) { - unsigned i = 0; - unsigned fifo_write = fifo->write; - unsigned fifo_read = fifo->read; - unsigned new_write = fifo_write; + uint32_t i = 0; + uint32_t fifo_write = fifo->write; + uint32_t fifo_read = fifo->read; + uint32_t new_write = fifo_write; for (i = 0; i < num; i++) { new_write = (new_write + 1) & (fifo->len - 1); @@ -54,12 +54,12 @@ kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num) /** * Get up to num elements from the fifo. Return the number actully read */ -static inline unsigned -kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num) +static inline uint32_t +kni_fifo_get(struct rte_kni_fifo *fifo, void **data, uint32_t num) { - unsigned i = 0; - unsigned new_read = fifo->read; - unsigned fifo_write = fifo->write; + uint32_t i = 0; + uint32_t new_read = fifo->read; + uint32_t fifo_write = fifo->write; for (i = 0; i < num; i++) { if (new_read == fifo_write) @@ -76,16 +76,16 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num) /** * Get the num of elements in the fifo */ -static inline unsigned +static inline uint32_t kni_fifo_count(struct rte_kni_fifo *fifo) { - return (fifo->len + fifo->write - fifo->read) & ( fifo->len - 1); + return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1); } /** * Get the num of available elements in the fifo */ -static inline unsigned +static inline uint32_t kni_fifo_free_count(struct rte_kni_fifo *fifo) { return (fifo->read - fifo->write - 1) & (fifo->len - 1); @@ -96,7 +96,7 @@ kni_fifo_free_count(struct rte_kni_fifo *fifo) * Initializes the kni fifo structure */ static inline void -kni_fifo_init(struct rte_kni_fifo *fifo, unsigned size) +kni_fifo_init(struct rte_kni_fifo *fifo, uint32_t size) { fifo->write = 0; fifo->read = 0; diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c index 59d15ca6..497db9bd 100644 --- a/lib/librte_eal/linuxapp/kni/kni_misc.c +++ b/lib/librte_eal/linuxapp/kni/kni_misc.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -47,52 +48,15 @@ MODULE_DESCRIPTION("Kernel Module for managing kni devices"); #define KNI_MAX_DEVICES 32 -extern void kni_net_rx(struct kni_dev *kni); -extern void kni_net_init(struct net_device *dev); -extern void kni_net_config_lo_mode(char *lo_str); -extern void kni_net_poll_resp(struct kni_dev *kni); -extern void kni_set_ethtool_ops(struct net_device *netdev); - -extern int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); -extern void ixgbe_kni_remove(struct pci_dev *pdev); -extern int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); -extern void igb_kni_remove(struct pci_dev *pdev); - -static int kni_open(struct inode *inode, struct file *file); -static int kni_release(struct inode *inode, struct file *file); -static int kni_ioctl(struct inode *inode, unsigned int ioctl_num, - unsigned long ioctl_param); -static int kni_compat_ioctl(struct inode *inode, unsigned int ioctl_num, - unsigned long ioctl_param); -static int kni_dev_remove(struct kni_dev *dev); - -static int __init kni_parse_kthread_mode(void); - -/* KNI processing for single kernel thread mode */ -static int kni_thread_single(void *unused); -/* KNI processing for multiple kernel thread mode */ -static int kni_thread_multiple(void *param); - -static struct file_operations kni_fops = { - .owner = THIS_MODULE, - .open = kni_open, - .release = kni_release, - .unlocked_ioctl = (void *)kni_ioctl, - .compat_ioctl = (void *)kni_compat_ioctl, -}; - -static struct miscdevice kni_misc = { - .minor = MISC_DYNAMIC_MINOR, - .name = KNI_DEVICE, - .fops = &kni_fops, -}; +extern const struct pci_device_id ixgbe_pci_tbl[]; +extern const struct pci_device_id igb_pci_tbl[]; /* loopback mode */ -static char *lo_mode = NULL; +static char *lo_mode; /* Kernel thread mode */ -static char *kthread_mode = NULL; -static unsigned multiple_kthread_on = 0; +static char *kthread_mode; +static uint32_t multiple_kthread_on; #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */ @@ -100,20 +64,24 @@ static int kni_net_id; struct kni_net { unsigned long device_in_use; /* device in use flag */ + struct mutex kni_kthread_lock; struct task_struct *kni_kthread; struct rw_semaphore kni_list_lock; struct list_head kni_list_head; }; -static int __net_init kni_init_net(struct net *net) +static int __net_init +kni_init_net(struct net *net) { #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS struct kni_net *knet = net_generic(net, kni_net_id); + + memset(knet, 0, sizeof(*knet)); #else struct kni_net *knet; int ret; - knet = kmalloc(sizeof(struct kni_net), GFP_KERNEL); + knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL); if (!knet) { ret = -ENOMEM; return ret; @@ -123,6 +91,8 @@ static int __net_init kni_init_net(struct net *net) /* Clear the bit of device in use */ clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); + mutex_init(&knet->kni_kthread_lock); + init_rwsem(&knet->kni_list_lock); INIT_LIST_HEAD(&knet->kni_list_head); @@ -137,11 +107,15 @@ static int __net_init kni_init_net(struct net *net) #endif } -static void __net_exit kni_exit_net(struct net *net) +static void __net_exit +kni_exit_net(struct net *net) { -#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS - struct kni_net *knet = net_generic(net, kni_net_id); + struct kni_net *knet __maybe_unused; + + knet = net_generic(net, kni_net_id); + mutex_destroy(&knet->kni_kthread_lock); +#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS kfree(knet); #endif } @@ -155,72 +129,56 @@ static struct pernet_operations kni_net_ops = { #endif }; -static int __init -kni_init(void) +static int +kni_thread_single(void *data) { - int rc; - - KNI_PRINT("######## DPDK kni module loading ########\n"); - - if (kni_parse_kthread_mode() < 0) { - KNI_ERR("Invalid parameter for kthread_mode\n"); - return -EINVAL; - } + struct kni_net *knet = data; + int j; + struct kni_dev *dev; -#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS - rc = register_pernet_subsys(&kni_net_ops); + while (!kthread_should_stop()) { + down_read(&knet->kni_list_lock); + for (j = 0; j < KNI_RX_LOOP_NUM; j++) { + list_for_each_entry(dev, &knet->kni_list_head, list) { +#ifdef RTE_KNI_VHOST + kni_chk_vhost_rx(dev); #else - rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); + kni_net_rx(dev); +#endif + kni_net_poll_resp(dev); + } + } + up_read(&knet->kni_list_lock); +#ifdef RTE_KNI_PREEMPT_DEFAULT + /* reschedule out for a while */ + schedule_timeout_interruptible( + usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL)); #endif - if (rc) - return -EPERM; - - rc = misc_register(&kni_misc); - if (rc != 0) { - KNI_ERR("Misc registration failed\n"); - goto out; } - /* Configure the lo mode according to the input parameter */ - kni_net_config_lo_mode(lo_mode); - - KNI_PRINT("######## DPDK kni module loaded ########\n"); - return 0; - -out: -#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS - unregister_pernet_subsys(&kni_net_ops); -#else - register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); -#endif - return rc; } -static void __exit -kni_exit(void) +static int +kni_thread_multiple(void *param) { - misc_deregister(&kni_misc); -#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS - unregister_pernet_subsys(&kni_net_ops); + int j; + struct kni_dev *dev = (struct kni_dev *)param; + + while (!kthread_should_stop()) { + for (j = 0; j < KNI_RX_LOOP_NUM; j++) { +#ifdef RTE_KNI_VHOST + kni_chk_vhost_rx(dev); #else - register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); + kni_net_rx(dev); #endif - KNI_PRINT("####### DPDK kni module unloaded #######\n"); -} - -static int __init -kni_parse_kthread_mode(void) -{ - if (!kthread_mode) - return 0; - - if (strcmp(kthread_mode, "single") == 0) - return 0; - else if (strcmp(kthread_mode, "multiple") == 0) - multiple_kthread_on = 1; - else - return -1; + kni_net_poll_resp(dev); + } +#ifdef RTE_KNI_PREEMPT_DEFAULT + schedule_timeout_interruptible( + usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL)); +#endif + } return 0; } @@ -235,21 +193,29 @@ kni_open(struct inode *inode, struct file *file) if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use)) return -EBUSY; - /* Create kernel thread for single mode */ - if (multiple_kthread_on == 0) { - KNI_PRINT("Single kernel thread for all KNI devices\n"); - /* Create kernel thread for RX */ - knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet, - "kni_single"); - if (IS_ERR(knet->kni_kthread)) { - KNI_ERR("Unable to create kernel threaed\n"); - return PTR_ERR(knet->kni_kthread); - } - } else - KNI_PRINT("Multiple kernel thread mode enabled\n"); - file->private_data = get_net(net); - KNI_PRINT("/dev/kni opened\n"); + pr_debug("/dev/kni opened\n"); + + return 0; +} + +static int +kni_dev_remove(struct kni_dev *dev) +{ + if (!dev) + return -ENODEV; + + if (dev->pci_dev) { + if (pci_match_id(ixgbe_pci_tbl, dev->pci_dev)) + ixgbe_kni_remove(dev->pci_dev); + else if (pci_match_id(igb_pci_tbl, dev->pci_dev)) + igb_kni_remove(dev->pci_dev); + } + + if (dev->net_dev) { + unregister_netdev(dev->net_dev); + free_netdev(dev->net_dev); + } return 0; } @@ -263,9 +229,13 @@ kni_release(struct inode *inode, struct file *file) /* Stop kernel thread for single mode */ if (multiple_kthread_on == 0) { + mutex_lock(&knet->kni_kthread_lock); /* Stop kernel thread */ - kthread_stop(knet->kni_kthread); - knet->kni_kthread = NULL; + if (knet->kni_kthread != NULL) { + kthread_stop(knet->kni_kthread); + knet->kni_kthread = NULL; + } + mutex_unlock(&knet->kni_kthread_lock); } down_write(&knet->kni_list_lock); @@ -288,110 +258,70 @@ kni_release(struct inode *inode, struct file *file) clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); put_net(net); - KNI_PRINT("/dev/kni closed\n"); + pr_debug("/dev/kni closed\n"); return 0; } static int -kni_thread_single(void *data) +kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev) { - struct kni_net *knet = data; - int j; - struct kni_dev *dev; + if (!kni || !dev) + return -1; - while (!kthread_should_stop()) { - down_read(&knet->kni_list_lock); - for (j = 0; j < KNI_RX_LOOP_NUM; j++) { - list_for_each_entry(dev, &knet->kni_list_head, list) { -#ifdef RTE_KNI_VHOST - kni_chk_vhost_rx(dev); -#else - kni_net_rx(dev); -#endif - kni_net_poll_resp(dev); - } - } - up_read(&knet->kni_list_lock); -#ifdef RTE_KNI_PREEMPT_DEFAULT - /* reschedule out for a while */ - schedule_timeout_interruptible(usecs_to_jiffies( \ - KNI_KTHREAD_RESCHEDULE_INTERVAL)); -#endif + /* Check if network name has been used */ + if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) { + pr_err("KNI name %s duplicated\n", dev->name); + return -1; } return 0; } static int -kni_thread_multiple(void *param) +kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind) { - int j; - struct kni_dev *dev = (struct kni_dev *)param; - - while (!kthread_should_stop()) { - for (j = 0; j < KNI_RX_LOOP_NUM; j++) { -#ifdef RTE_KNI_VHOST - kni_chk_vhost_rx(dev); -#else - kni_net_rx(dev); -#endif - kni_net_poll_resp(dev); + /** + * Create a new kernel thread for multiple mode, set its core affinity, + * and finally wake it up. + */ + if (multiple_kthread_on) { + kni->pthread = kthread_create(kni_thread_multiple, + (void *)kni, "kni_%s", kni->name); + if (IS_ERR(kni->pthread)) { + kni_dev_remove(kni); + return -ECANCELED; } -#ifdef RTE_KNI_PREEMPT_DEFAULT - schedule_timeout_interruptible(usecs_to_jiffies( \ - KNI_KTHREAD_RESCHEDULE_INTERVAL)); -#endif - } - - return 0; -} - -static int -kni_dev_remove(struct kni_dev *dev) -{ - if (!dev) - return -ENODEV; - - switch (dev->device_id) { - #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev): - #include - igb_kni_remove(dev->pci_dev); - break; - #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) case (dev): - #include - ixgbe_kni_remove(dev->pci_dev); - break; - default: - break; - } - - if (dev->net_dev) { - unregister_netdev(dev->net_dev); - free_netdev(dev->net_dev); - } - return 0; -} + if (force_bind) + kthread_bind(kni->pthread, kni->core_id); + wake_up_process(kni->pthread); + } else { + mutex_lock(&knet->kni_kthread_lock); + + if (knet->kni_kthread == NULL) { + knet->kni_kthread = kthread_create(kni_thread_single, + (void *)knet, "kni_single"); + if (IS_ERR(knet->kni_kthread)) { + mutex_unlock(&knet->kni_kthread_lock); + kni_dev_remove(kni); + return -ECANCELED; + } -static int -kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev) -{ - if (!kni || !dev) - return -1; + if (force_bind) + kthread_bind(knet->kni_kthread, kni->core_id); + wake_up_process(knet->kni_kthread); + } - /* Check if network name has been used */ - if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) { - KNI_ERR("KNI name %s duplicated\n", dev->name); - return -1; + mutex_unlock(&knet->kni_kthread_lock); } return 0; } static int -kni_ioctl_create(struct net *net, - unsigned int ioctl_num, unsigned long ioctl_param) +kni_ioctl_create(struct net *net, uint32_t ioctl_num, + unsigned long ioctl_param) { struct kni_net *knet = net_generic(net, kni_net_id); int ret; @@ -402,7 +332,7 @@ kni_ioctl_create(struct net *net, struct net_device *lad_dev = NULL; struct kni_dev *kni, *dev, *n; - printk(KERN_INFO "KNI: Creating kni...\n"); + pr_info("Creating kni...\n"); /* Check the buffer size, to avoid warning */ if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) return -EINVAL; @@ -410,17 +340,15 @@ kni_ioctl_create(struct net *net, /* Copy kni info from user space */ ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)); if (ret) { - KNI_ERR("copy_from_user in kni_ioctl_create"); + pr_err("copy_from_user in kni_ioctl_create"); return -EIO; } /** - * Check if the cpu core id is valid for binding, - * for multiple kernel thread mode. + * Check if the cpu core id is valid for binding. */ - if (multiple_kthread_on && dev_info.force_bind && - !cpu_online(dev_info.core_id)) { - KNI_ERR("cpu %u is not online\n", dev_info.core_id); + if (dev_info.force_bind && !cpu_online(dev_info.core_id)) { + pr_err("cpu %u is not online\n", dev_info.core_id); return -EINVAL; } @@ -440,7 +368,7 @@ kni_ioctl_create(struct net *net, #endif kni_net_init); if (net_dev == NULL) { - KNI_ERR("error allocating device \"%s\"\n", dev_info.name); + pr_err("error allocating device \"%s\"\n", dev_info.name); return -EBUSY; } @@ -464,33 +392,27 @@ kni_ioctl_create(struct net *net, kni->sync_va = dev_info.sync_va; kni->sync_kva = phys_to_virt(dev_info.sync_phys); - kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys); - kni->mbuf_va = dev_info.mbuf_va; - #ifdef RTE_KNI_VHOST kni->vhost_queue = NULL; kni->vq_status = BE_STOP; #endif kni->mbuf_size = dev_info.mbuf_size; - KNI_PRINT("tx_phys: 0x%016llx, tx_q addr: 0x%p\n", + pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n", (unsigned long long) dev_info.tx_phys, kni->tx_q); - KNI_PRINT("rx_phys: 0x%016llx, rx_q addr: 0x%p\n", + pr_debug("rx_phys: 0x%016llx, rx_q addr: 0x%p\n", (unsigned long long) dev_info.rx_phys, kni->rx_q); - KNI_PRINT("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n", + pr_debug("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n", (unsigned long long) dev_info.alloc_phys, kni->alloc_q); - KNI_PRINT("free_phys: 0x%016llx, free_q addr: 0x%p\n", + pr_debug("free_phys: 0x%016llx, free_q addr: 0x%p\n", (unsigned long long) dev_info.free_phys, kni->free_q); - KNI_PRINT("req_phys: 0x%016llx, req_q addr: 0x%p\n", + pr_debug("req_phys: 0x%016llx, req_q addr: 0x%p\n", (unsigned long long) dev_info.req_phys, kni->req_q); - KNI_PRINT("resp_phys: 0x%016llx, resp_q addr: 0x%p\n", + pr_debug("resp_phys: 0x%016llx, resp_q addr: 0x%p\n", (unsigned long long) dev_info.resp_phys, kni->resp_q); - KNI_PRINT("mbuf_phys: 0x%016llx, mbuf_kva: 0x%p\n", - (unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva); - KNI_PRINT("mbuf_va: 0x%p\n", dev_info.mbuf_va); - KNI_PRINT("mbuf_size: %u\n", kni->mbuf_size); + pr_debug("mbuf_size: %u\n", kni->mbuf_size); - KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x\n", + pr_debug("PCI: %02x:%02x.%02x %04x:%04x\n", dev_info.bus, dev_info.devid, dev_info.function, @@ -501,7 +423,7 @@ kni_ioctl_create(struct net *net, /* Support Ethtool */ while (pci) { - KNI_PRINT("pci_bus: %02x:%02x:%02x \n", + pr_debug("pci_bus: %02x:%02x:%02x\n", pci->bus->number, PCI_SLOT(pci->devfn), PCI_FUNC(pci->devfn)); @@ -510,28 +432,21 @@ kni_ioctl_create(struct net *net, (PCI_SLOT(pci->devfn) == dev_info.devid) && (PCI_FUNC(pci->devfn) == dev_info.function)) { found_pci = pci; - switch (dev_info.device_id) { - #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev): - #include - ret = igb_kni_probe(found_pci, &lad_dev); - break; - #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) \ - case (dev): - #include + + if (pci_match_id(ixgbe_pci_tbl, found_pci)) ret = ixgbe_kni_probe(found_pci, &lad_dev); - break; - default: + else if (pci_match_id(igb_pci_tbl, found_pci)) + ret = igb_kni_probe(found_pci, &lad_dev); + else ret = -1; - break; - } - KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p\n", + pr_debug("PCI found: pci=0x%p, lad_dev=0x%p\n", pci, lad_dev); if (ret == 0) { kni->lad_dev = lad_dev; kni_set_ethtool_ops(kni->net_dev); } else { - KNI_ERR("Device not supported by ethtool"); + pr_err("Device not supported by ethtool"); kni->lad_dev = NULL; } @@ -546,7 +461,7 @@ kni_ioctl_create(struct net *net, pci_dev_put(pci); if (kni->lad_dev) - memcpy(net_dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN); + ether_addr_copy(net_dev->dev_addr, kni->lad_dev->dev_addr); else /* * Generate random mac address. eth_random_addr() is the newer @@ -556,9 +471,11 @@ kni_ioctl_create(struct net *net, ret = register_netdev(net_dev); if (ret) { - KNI_ERR("error %i registering device \"%s\"\n", + pr_err("error %i registering device \"%s\"\n", ret, dev_info.name); + kni->net_dev = NULL; kni_dev_remove(kni); + free_netdev(net_dev); return -ENODEV; } @@ -566,22 +483,9 @@ kni_ioctl_create(struct net *net, kni_vhost_init(kni); #endif - /** - * Create a new kernel thread for multiple mode, set its core affinity, - * and finally wake it up. - */ - if (multiple_kthread_on) { - kni->pthread = kthread_create(kni_thread_multiple, - (void *)kni, - "kni_%s", kni->name); - if (IS_ERR(kni->pthread)) { - kni_dev_remove(kni); - return -ECANCELED; - } - if (dev_info.force_bind) - kthread_bind(kni->pthread, kni->core_id); - wake_up_process(kni->pthread); - } + ret = kni_run_thread(knet, kni, dev_info.force_bind); + if (ret != 0) + return ret; down_write(&knet->kni_list_lock); list_add(&kni->list, &knet->kni_list_head); @@ -591,8 +495,8 @@ kni_ioctl_create(struct net *net, } static int -kni_ioctl_release(struct net *net, - unsigned int ioctl_num, unsigned long ioctl_param) +kni_ioctl_release(struct net *net, uint32_t ioctl_num, + unsigned long ioctl_param) { struct kni_net *knet = net_generic(net, kni_net_id); int ret = -EINVAL; @@ -600,11 +504,11 @@ kni_ioctl_release(struct net *net, struct rte_kni_device_info dev_info; if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) - return -EINVAL; + return -EINVAL; ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)); if (ret) { - KNI_ERR("copy_from_user in kni_ioctl_release"); + pr_err("copy_from_user in kni_ioctl_release"); return -EIO; } @@ -631,21 +535,19 @@ kni_ioctl_release(struct net *net, break; } up_write(&knet->kni_list_lock); - printk(KERN_INFO "KNI: %s release kni named %s\n", + pr_info("%s release kni named %s\n", (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name); return ret; } static int -kni_ioctl(struct inode *inode, - unsigned int ioctl_num, - unsigned long ioctl_param) +kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param) { int ret = -EINVAL; struct net *net = current->nsproxy->net_ns; - KNI_DBG("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param); + pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param); /* * Switch according to the ioctl called @@ -661,7 +563,7 @@ kni_ioctl(struct inode *inode, ret = kni_ioctl_release(net, ioctl_num, ioctl_param); break; default: - KNI_DBG("IOCTL default\n"); + pr_debug("IOCTL default\n"); break; } @@ -669,16 +571,99 @@ kni_ioctl(struct inode *inode, } static int -kni_compat_ioctl(struct inode *inode, - unsigned int ioctl_num, +kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param) { /* 32 bits app on 64 bits OS to be supported later */ - KNI_PRINT("Not implemented.\n"); + pr_debug("Not implemented.\n"); return -EINVAL; } +static const struct file_operations kni_fops = { + .owner = THIS_MODULE, + .open = kni_open, + .release = kni_release, + .unlocked_ioctl = (void *)kni_ioctl, + .compat_ioctl = (void *)kni_compat_ioctl, +}; + +static struct miscdevice kni_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = KNI_DEVICE, + .fops = &kni_fops, +}; + +static int __init +kni_parse_kthread_mode(void) +{ + if (!kthread_mode) + return 0; + + if (strcmp(kthread_mode, "single") == 0) + return 0; + else if (strcmp(kthread_mode, "multiple") == 0) + multiple_kthread_on = 1; + else + return -1; + + return 0; +} + +static int __init +kni_init(void) +{ + int rc; + + if (kni_parse_kthread_mode() < 0) { + pr_err("Invalid parameter for kthread_mode\n"); + return -EINVAL; + } + + if (multiple_kthread_on == 0) + pr_debug("Single kernel thread for all KNI devices\n"); + else + pr_debug("Multiple kernel thread mode enabled\n"); + +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS + rc = register_pernet_subsys(&kni_net_ops); +#else + rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); +#endif + if (rc) + return -EPERM; + + rc = misc_register(&kni_misc); + if (rc != 0) { + pr_err("Misc registration failed\n"); + goto out; + } + + /* Configure the lo mode according to the input parameter */ + kni_net_config_lo_mode(lo_mode); + + return 0; + +out: +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS + unregister_pernet_subsys(&kni_net_ops); +#else + unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops); +#endif + return rc; +} + +static void __exit +kni_exit(void) +{ + misc_deregister(&kni_misc); +#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS + unregister_pernet_subsys(&kni_net_ops); +#else + unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops); +#endif +} + module_init(kni_init); module_exit(kni_exit); diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c index fc82193a..4ac99cfe 100644 --- a/lib/librte_eal/linuxapp/kni/kni_net.c +++ b/lib/librte_eal/linuxapp/kni/kni_net.c @@ -44,23 +44,103 @@ #define WD_TIMEOUT 5 /*jiffies */ -#define MBUF_BURST_SZ 32 - #define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */ /* typedef for rx function */ typedef void (*kni_net_rx_t)(struct kni_dev *kni); -static int kni_net_tx(struct sk_buff *skb, struct net_device *dev); static void kni_net_rx_normal(struct kni_dev *kni); -static void kni_net_rx_lo_fifo(struct kni_dev *kni); -static void kni_net_rx_lo_fifo_skb(struct kni_dev *kni); -static int kni_net_process_request(struct kni_dev *kni, - struct rte_kni_request *req); /* kni rx function pointer, with default to normal rx */ static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal; +/* physical address to kernel virtual address */ +static void * +pa2kva(void *pa) +{ + return phys_to_virt((unsigned long)pa); +} + +/* physical address to virtual address */ +static void * +pa2va(void *pa, struct rte_kni_mbuf *m) +{ + void *va; + + va = (void *)((unsigned long)pa + + (unsigned long)m->buf_addr - + (unsigned long)m->buf_physaddr); + return va; +} + +/* mbuf data kernel virtual address from mbuf kernel virtual address */ +static void * +kva2data_kva(struct rte_kni_mbuf *m) +{ + return phys_to_virt(m->buf_physaddr + m->data_off); +} + +/* virtual address to physical address */ +static void * +va2pa(void *va, struct rte_kni_mbuf *m) +{ + void *pa; + + pa = (void *)((unsigned long)va - + ((unsigned long)m->buf_addr - + (unsigned long)m->buf_physaddr)); + return pa; +} + +/* + * It can be called to process the request. + */ +static int +kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req) +{ + int ret = -1; + void *resp_va; + uint32_t num; + int ret_val; + + if (!kni || !req) { + pr_err("No kni instance or request\n"); + return -EINVAL; + } + + mutex_lock(&kni->sync_lock); + + /* Construct data */ + memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request)); + num = kni_fifo_put(kni->req_q, &kni->sync_va, 1); + if (num < 1) { + pr_err("Cannot send to req_q\n"); + ret = -EBUSY; + goto fail; + } + + ret_val = wait_event_interruptible_timeout(kni->wq, + kni_fifo_count(kni->resp_q), 3 * HZ); + if (signal_pending(current) || ret_val <= 0) { + ret = -ETIME; + goto fail; + } + num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1); + if (num != 1 || resp_va != kni->sync_va) { + /* This should never happen */ + pr_err("No data in resp_q\n"); + ret = -ENODATA; + goto fail; + } + + memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request)); + ret = 0; + +fail: + mutex_unlock(&kni->sync_lock); + return ret; +} + /* * Open and close */ @@ -115,19 +195,113 @@ kni_net_config(struct net_device *dev, struct ifmap *map) return 0; } +/* + * Transmit a packet (called by the kernel) + */ +#ifdef RTE_KNI_VHOST +static int +kni_net_tx(struct sk_buff *skb, struct net_device *dev) +{ + struct kni_dev *kni = netdev_priv(dev); + + dev_kfree_skb(skb); + kni->stats.tx_dropped++; + + return NETDEV_TX_OK; +} +#else +static int +kni_net_tx(struct sk_buff *skb, struct net_device *dev) +{ + int len = 0; + uint32_t ret; + struct kni_dev *kni = netdev_priv(dev); + struct rte_kni_mbuf *pkt_kva = NULL; + void *pkt_pa = NULL; + void *pkt_va = NULL; + + /* save the timestamp */ +#ifdef HAVE_TRANS_START_HELPER + netif_trans_update(dev); +#else + dev->trans_start = jiffies; +#endif + + /* Check if the length of skb is less than mbuf size */ + if (skb->len > kni->mbuf_size) + goto drop; + + /** + * Check if it has at least one free entry in tx_q and + * one entry in alloc_q. + */ + if (kni_fifo_free_count(kni->tx_q) == 0 || + kni_fifo_count(kni->alloc_q) == 0) { + /** + * If no free entry in tx_q or no entry in alloc_q, + * drops skb and goes out. + */ + goto drop; + } + + /* dequeue a mbuf from alloc_q */ + ret = kni_fifo_get(kni->alloc_q, &pkt_pa, 1); + if (likely(ret == 1)) { + void *data_kva; + + pkt_kva = pa2kva(pkt_pa); + data_kva = kva2data_kva(pkt_kva); + pkt_va = pa2va(pkt_pa, pkt_kva); + + len = skb->len; + memcpy(data_kva, skb->data, len); + if (unlikely(len < ETH_ZLEN)) { + memset(data_kva + len, 0, ETH_ZLEN - len); + len = ETH_ZLEN; + } + pkt_kva->pkt_len = len; + pkt_kva->data_len = len; + + /* enqueue mbuf into tx_q */ + ret = kni_fifo_put(kni->tx_q, &pkt_va, 1); + if (unlikely(ret != 1)) { + /* Failing should not happen */ + pr_err("Fail to enqueue mbuf into tx_q\n"); + goto drop; + } + } else { + /* Failing should not happen */ + pr_err("Fail to dequeue mbuf from alloc_q\n"); + goto drop; + } + + /* Free skb and update statistics */ + dev_kfree_skb(skb); + kni->stats.tx_bytes += len; + kni->stats.tx_packets++; + + return NETDEV_TX_OK; + +drop: + /* Free skb and update statistics */ + dev_kfree_skb(skb); + kni->stats.tx_dropped++; + + return NETDEV_TX_OK; +} +#endif + /* * RX: normal working mode */ static void kni_net_rx_normal(struct kni_dev *kni) { - unsigned ret; + uint32_t ret; uint32_t len; - unsigned i, num_rx, num_fq; + uint32_t i, num_rx, num_fq; struct rte_kni_mbuf *kva; - struct rte_kni_mbuf *va[MBUF_BURST_SZ]; - void * data_kva; - + void *data_kva; struct sk_buff *skb; struct net_device *dev = kni->net_dev; @@ -139,24 +313,22 @@ kni_net_rx_normal(struct kni_dev *kni) } /* Calculate the number of entries to dequeue from rx_q */ - num_rx = min(num_fq, (unsigned)MBUF_BURST_SZ); + num_rx = min_t(uint32_t, num_fq, MBUF_BURST_SZ); /* Burst dequeue from rx_q */ - num_rx = kni_fifo_get(kni->rx_q, (void **)va, num_rx); + num_rx = kni_fifo_get(kni->rx_q, kni->pa, num_rx); if (num_rx == 0) return; /* Transfer received packets to netif */ for (i = 0; i < num_rx; i++) { - kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; + kva = pa2kva(kni->pa[i]); len = kva->pkt_len; - - data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va - + kni->mbuf_kva; + data_kva = kva2data_kva(kva); + kni->va[i] = pa2va(kni->pa[i], kva); skb = dev_alloc_skb(len + 2); if (!skb) { - KNI_ERR("Out of mem, dropping pkts\n"); /* Update statistics */ kni->stats.rx_dropped++; continue; @@ -178,9 +350,8 @@ kni_net_rx_normal(struct kni_dev *kni) if (!kva->next) break; - kva = kva->next - kni->mbuf_va + kni->mbuf_kva; - data_kva = kva->buf_addr + kva->data_off - - kni->mbuf_va + kni->mbuf_kva; + kva = pa2kva(va2pa(kva->next, kva)); + data_kva = kva2data_kva(kva); } } @@ -197,10 +368,10 @@ kni_net_rx_normal(struct kni_dev *kni) } /* Burst enqueue mbufs into free_q */ - ret = kni_fifo_put(kni->free_q, (void **)va, num_rx); + ret = kni_fifo_put(kni->free_q, kni->va, num_rx); if (ret != num_rx) /* Failing should not happen */ - KNI_ERR("Fail to enqueue entries into free_q\n"); + pr_err("Fail to enqueue entries into free_q\n"); } /* @@ -209,15 +380,12 @@ kni_net_rx_normal(struct kni_dev *kni) static void kni_net_rx_lo_fifo(struct kni_dev *kni) { - unsigned ret; + uint32_t ret; uint32_t len; - unsigned i, num, num_rq, num_tq, num_aq, num_fq; + uint32_t i, num, num_rq, num_tq, num_aq, num_fq; struct rte_kni_mbuf *kva; - struct rte_kni_mbuf *va[MBUF_BURST_SZ]; - void * data_kva; - + void *data_kva; struct rte_kni_mbuf *alloc_kva; - struct rte_kni_mbuf *alloc_va[MBUF_BURST_SZ]; void *alloc_data_kva; /* Get the number of entries in rx_q */ @@ -236,33 +404,32 @@ kni_net_rx_lo_fifo(struct kni_dev *kni) num = min(num_rq, num_tq); num = min(num, num_aq); num = min(num, num_fq); - num = min(num, (unsigned)MBUF_BURST_SZ); + num = min_t(uint32_t, num, MBUF_BURST_SZ); /* Return if no entry to dequeue from rx_q */ if (num == 0) return; /* Burst dequeue from rx_q */ - ret = kni_fifo_get(kni->rx_q, (void **)va, num); + ret = kni_fifo_get(kni->rx_q, kni->pa, num); if (ret == 0) return; /* Failing should not happen */ /* Dequeue entries from alloc_q */ - ret = kni_fifo_get(kni->alloc_q, (void **)alloc_va, num); + ret = kni_fifo_get(kni->alloc_q, kni->alloc_pa, num); if (ret) { num = ret; /* Copy mbufs */ for (i = 0; i < num; i++) { - kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; + kva = pa2kva(kni->pa[i]); len = kva->pkt_len; - data_kva = kva->buf_addr + kva->data_off - - kni->mbuf_va + kni->mbuf_kva; - - alloc_kva = (void *)alloc_va[i] - kni->mbuf_va + - kni->mbuf_kva; - alloc_data_kva = alloc_kva->buf_addr + - alloc_kva->data_off - kni->mbuf_va + - kni->mbuf_kva; + data_kva = kva2data_kva(kva); + kni->va[i] = pa2va(kni->pa[i], kva); + + alloc_kva = pa2kva(kni->alloc_pa[i]); + alloc_data_kva = kva2data_kva(alloc_kva); + kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva); + memcpy(alloc_data_kva, data_kva, len); alloc_kva->pkt_len = len; alloc_kva->data_len = len; @@ -272,17 +439,17 @@ kni_net_rx_lo_fifo(struct kni_dev *kni) } /* Burst enqueue mbufs into tx_q */ - ret = kni_fifo_put(kni->tx_q, (void **)alloc_va, num); + ret = kni_fifo_put(kni->tx_q, kni->alloc_va, num); if (ret != num) /* Failing should not happen */ - KNI_ERR("Fail to enqueue mbufs into tx_q\n"); + pr_err("Fail to enqueue mbufs into tx_q\n"); } /* Burst enqueue mbufs into free_q */ - ret = kni_fifo_put(kni->free_q, (void **)va, num); + ret = kni_fifo_put(kni->free_q, kni->va, num); if (ret != num) /* Failing should not happen */ - KNI_ERR("Fail to enqueue mbufs into free_q\n"); + pr_err("Fail to enqueue mbufs into free_q\n"); /** * Update statistic, and enqueue/dequeue failure is impossible, @@ -298,13 +465,11 @@ kni_net_rx_lo_fifo(struct kni_dev *kni) static void kni_net_rx_lo_fifo_skb(struct kni_dev *kni) { - unsigned ret; + uint32_t ret; uint32_t len; - unsigned i, num_rq, num_fq, num; + uint32_t i, num_rq, num_fq, num; struct rte_kni_mbuf *kva; - struct rte_kni_mbuf *va[MBUF_BURST_SZ]; - void * data_kva; - + void *data_kva; struct sk_buff *skb; struct net_device *dev = kni->net_dev; @@ -316,28 +481,26 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni) /* Calculate the number of entries to dequeue from rx_q */ num = min(num_rq, num_fq); - num = min(num, (unsigned)MBUF_BURST_SZ); + num = min_t(uint32_t, num, MBUF_BURST_SZ); /* Return if no entry to dequeue from rx_q */ if (num == 0) return; /* Burst dequeue mbufs from rx_q */ - ret = kni_fifo_get(kni->rx_q, (void **)va, num); + ret = kni_fifo_get(kni->rx_q, kni->pa, num); if (ret == 0) return; /* Copy mbufs to sk buffer and then call tx interface */ for (i = 0; i < num; i++) { - kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; + kva = pa2kva(kni->pa[i]); len = kva->pkt_len; - data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + - kni->mbuf_kva; + data_kva = kva2data_kva(kva); + kni->va[i] = pa2va(kni->pa[i], kva); skb = dev_alloc_skb(len + 2); - if (skb == NULL) - KNI_ERR("Out of mem, dropping pkts\n"); - else { + if (skb) { /* Align IP on 16B boundary */ skb_reserve(skb, 2); memcpy(skb_put(skb, len), data_kva, len); @@ -349,7 +512,6 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni) /* Simulate real usage, allocate/copy skb twice */ skb = dev_alloc_skb(len + 2); if (skb == NULL) { - KNI_ERR("Out of mem, dropping pkts\n"); kni->stats.rx_dropped++; continue; } @@ -370,9 +532,8 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni) if (!kva->next) break; - kva = kva->next - kni->mbuf_va + kni->mbuf_kva; - data_kva = kva->buf_addr + kva->data_off - - kni->mbuf_va + kni->mbuf_kva; + kva = pa2kva(va2pa(kva->next, kva)); + data_kva = kva2data_kva(kva); } } @@ -387,10 +548,10 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni) } /* enqueue all the mbufs from rx_q into free_q */ - ret = kni_fifo_put(kni->free_q, (void **)&va, num); + ret = kni_fifo_put(kni->free_q, kni->va, num); if (ret != num) /* Failing should not happen */ - KNI_ERR("Fail to enqueue mbufs into free_q\n"); + pr_err("Fail to enqueue mbufs into free_q\n"); } /* rx interface */ @@ -404,115 +565,19 @@ kni_net_rx(struct kni_dev *kni) (*kni_net_rx_func)(kni); } -/* - * Transmit a packet (called by the kernel) - */ -#ifdef RTE_KNI_VHOST -static int -kni_net_tx(struct sk_buff *skb, struct net_device *dev) -{ - struct kni_dev *kni = netdev_priv(dev); - - dev_kfree_skb(skb); - kni->stats.tx_dropped++; - - return NETDEV_TX_OK; -} -#else -static int -kni_net_tx(struct sk_buff *skb, struct net_device *dev) -{ - int len = 0; - unsigned ret; - struct kni_dev *kni = netdev_priv(dev); - struct rte_kni_mbuf *pkt_kva = NULL; - struct rte_kni_mbuf *pkt_va = NULL; - - /* save the timestamp */ -#ifdef HAVE_TRANS_START_HELPER - netif_trans_update(dev); -#else - dev->trans_start = jiffies; -#endif - - /* Check if the length of skb is less than mbuf size */ - if (skb->len > kni->mbuf_size) - goto drop; - - /** - * Check if it has at least one free entry in tx_q and - * one entry in alloc_q. - */ - if (kni_fifo_free_count(kni->tx_q) == 0 || - kni_fifo_count(kni->alloc_q) == 0) { - /** - * If no free entry in tx_q or no entry in alloc_q, - * drops skb and goes out. - */ - goto drop; - } - - /* dequeue a mbuf from alloc_q */ - ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1); - if (likely(ret == 1)) { - void *data_kva; - - pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva; - data_kva = pkt_kva->buf_addr + pkt_kva->data_off - kni->mbuf_va - + kni->mbuf_kva; - - len = skb->len; - memcpy(data_kva, skb->data, len); - if (unlikely(len < ETH_ZLEN)) { - memset(data_kva + len, 0, ETH_ZLEN - len); - len = ETH_ZLEN; - } - pkt_kva->pkt_len = len; - pkt_kva->data_len = len; - - /* enqueue mbuf into tx_q */ - ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1); - if (unlikely(ret != 1)) { - /* Failing should not happen */ - KNI_ERR("Fail to enqueue mbuf into tx_q\n"); - goto drop; - } - } else { - /* Failing should not happen */ - KNI_ERR("Fail to dequeue mbuf from alloc_q\n"); - goto drop; - } - - /* Free skb and update statistics */ - dev_kfree_skb(skb); - kni->stats.tx_bytes += len; - kni->stats.tx_packets++; - - return NETDEV_TX_OK; - -drop: - /* Free skb and update statistics */ - dev_kfree_skb(skb); - kni->stats.tx_dropped++; - - return NETDEV_TX_OK; -} -#endif - /* * Deal with a transmit timeout. */ static void -kni_net_tx_timeout (struct net_device *dev) +kni_net_tx_timeout(struct net_device *dev) { struct kni_dev *kni = netdev_priv(dev); - KNI_DBG("Transmit timeout at %ld, latency %ld\n", jiffies, - jiffies - dev->trans_start); + pr_debug("Transmit timeout at %ld, latency %ld\n", jiffies, + jiffies - dev_trans_start(dev)); kni->stats.tx_errors++; netif_wake_queue(dev); - return; } /* @@ -521,8 +586,8 @@ kni_net_tx_timeout (struct net_device *dev) static int kni_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { - KNI_DBG("kni_net_ioctl %d\n", - ((struct kni_dev *)netdev_priv(dev))->group_id); + pr_debug("kni_net_ioctl group:%d cmd:%d\n", + ((struct kni_dev *)netdev_priv(dev))->group_id, cmd); return 0; } @@ -539,7 +604,7 @@ kni_net_change_mtu(struct net_device *dev, int new_mtu) struct rte_kni_request req; struct kni_dev *kni = netdev_priv(dev); - KNI_DBG("kni_net_change_mtu new mtu %d to be set\n", new_mtu); + pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu); memset(&req, 0, sizeof(req)); req.req_id = RTE_KNI_REQ_CHANGE_MTU; @@ -561,55 +626,6 @@ kni_net_poll_resp(struct kni_dev *kni) wake_up_interruptible(&kni->wq); } -/* - * It can be called to process the request. - */ -static int -kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req) -{ - int ret = -1; - void *resp_va; - unsigned num; - int ret_val; - - if (!kni || !req) { - KNI_ERR("No kni instance or request\n"); - return -EINVAL; - } - - mutex_lock(&kni->sync_lock); - - /* Construct data */ - memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request)); - num = kni_fifo_put(kni->req_q, &kni->sync_va, 1); - if (num < 1) { - KNI_ERR("Cannot send to req_q\n"); - ret = -EBUSY; - goto fail; - } - - ret_val = wait_event_interruptible_timeout(kni->wq, - kni_fifo_count(kni->resp_q), 3 * HZ); - if (signal_pending(current) || ret_val <= 0) { - ret = -ETIME; - goto fail; - } - num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1); - if (num != 1 || resp_va != kni->sync_va) { - /* This should never happen */ - KNI_ERR("No data in resp_q\n"); - ret = -ENODATA; - goto fail; - } - - memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request)); - ret = 0; - -fail: - mutex_unlock(&kni->sync_lock); - return ret; -} - /* * Return statistics to the caller */ @@ -617,6 +633,7 @@ static struct net_device_stats * kni_net_stats(struct net_device *dev) { struct kni_dev *kni = netdev_priv(dev); + return &kni->stats; } @@ -626,7 +643,7 @@ kni_net_stats(struct net_device *dev) static int kni_net_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, - const void *saddr, unsigned int len) + const void *saddr, uint32_t len) { struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); @@ -637,7 +654,6 @@ kni_net_header(struct sk_buff *skb, struct net_device *dev, return dev->hard_header_len; } - /* * Re-fill the eth header */ @@ -662,9 +678,11 @@ kni_net_rebuild_header(struct sk_buff *skb) * * Returns 0 on success, negative on failure **/ -static int kni_net_set_mac(struct net_device *netdev, void *p) +static int +kni_net_set_mac(struct net_device *netdev, void *p) { struct sockaddr *addr = p; + if (!is_valid_ether_addr((unsigned char *)(addr->sa_data))) return -EADDRNOTAVAIL; memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); @@ -672,7 +690,8 @@ static int kni_net_set_mac(struct net_device *netdev, void *p) } #ifdef HAVE_CHANGE_CARRIER_CB -static int kni_net_change_carrier(struct net_device *dev, bool new_carrier) +static int +kni_net_change_carrier(struct net_device *dev, bool new_carrier) { if (new_carrier) netif_carrier_on(dev); @@ -711,8 +730,6 @@ kni_net_init(struct net_device *dev) { struct kni_dev *kni = netdev_priv(dev); - KNI_DBG("kni_net_init\n"); - init_waitqueue_head(&kni->wq); mutex_init(&kni->sync_lock); @@ -726,18 +743,18 @@ void kni_net_config_lo_mode(char *lo_str) { if (!lo_str) { - KNI_PRINT("loopback disabled"); + pr_debug("loopback disabled"); return; } if (!strcmp(lo_str, "lo_mode_none")) - KNI_PRINT("loopback disabled"); + pr_debug("loopback disabled"); else if (!strcmp(lo_str, "lo_mode_fifo")) { - KNI_PRINT("loopback mode=lo_mode_fifo enabled"); + pr_debug("loopback mode=lo_mode_fifo enabled"); kni_net_rx_func = kni_net_rx_lo_fifo; } else if (!strcmp(lo_str, "lo_mode_fifo_skb")) { - KNI_PRINT("loopback mode=lo_mode_fifo_skb enabled"); + pr_debug("loopback mode=lo_mode_fifo_skb enabled"); kni_net_rx_func = kni_net_rx_lo_fifo_skb; } else - KNI_PRINT("Incognizant parameter, loopback disabled"); + pr_debug("Incognizant parameter, loopback disabled"); } diff --git a/lib/librte_eal/linuxapp/kni/kni_vhost.c b/lib/librte_eal/linuxapp/kni/kni_vhost.c index a3ca8499..f54c34b1 100644 --- a/lib/librte_eal/linuxapp/kni/kni_vhost.c +++ b/lib/librte_eal/linuxapp/kni/kni_vhost.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "compat.h" #include "kni_dev.h" @@ -39,21 +40,12 @@ #define RX_BURST_SZ 4 -extern void put_unused_fd(unsigned int fd); - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0) -extern struct file* -sock_alloc_file(struct socket *sock, - int flags, const char *dname); - -extern int get_unused_fd_flags(unsigned flags); - -extern void fd_install(unsigned int fd, struct file *file); - +#ifdef HAVE_STATIC_SOCK_MAP_FD static int kni_sock_map_fd(struct socket *sock) { struct file *file; int fd = get_unused_fd_flags(0); + if (fd < 0) return fd; @@ -65,8 +57,6 @@ static int kni_sock_map_fd(struct socket *sock) fd_install(fd, file); return fd; } -#else -#define kni_sock_map_fd(s) sock_map_fd(s, 0) #endif static struct proto kni_raw_proto = { @@ -77,13 +67,13 @@ static struct proto kni_raw_proto = { static inline int kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m, - unsigned offset, unsigned len) + uint32_t offset, uint32_t len) { struct rte_kni_mbuf *pkt_kva = NULL; struct rte_kni_mbuf *pkt_va = NULL; int ret; - KNI_DBG_TX("tx offset=%d, len=%d, iovlen=%d\n", + pr_debug("tx offset=%d, len=%d, iovlen=%d\n", #ifdef HAVE_IOV_ITER_MSGHDR offset, len, (int)m->msg_iter.iov->iov_len); #else @@ -110,7 +100,7 @@ kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m, pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva; data_kva = pkt_kva->buf_addr + pkt_kva->data_off - - kni->mbuf_va + kni->mbuf_kva; + - kni->mbuf_va + kni->mbuf_kva; #ifdef HAVE_IOV_ITER_MSGHDR copy_from_iter(data_kva, len, &m->msg_iter); @@ -129,12 +119,12 @@ kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m, ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1); if (unlikely(ret != 1)) { /* Failing should not happen */ - KNI_ERR("Fail to enqueue mbuf into tx_q\n"); + pr_err("Fail to enqueue mbuf into tx_q\n"); goto drop; } } else { /* Failing should not happen */ - KNI_ERR("Fail to dequeue mbuf from alloc_q\n"); + pr_err("Fail to dequeue mbuf from alloc_q\n"); goto drop; } @@ -153,12 +143,12 @@ drop: static inline int kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m, - unsigned offset, unsigned len) + uint32_t offset, uint32_t len) { uint32_t pkt_len; struct rte_kni_mbuf *kva; struct rte_kni_mbuf *va; - void * data_kva; + void *data_kva; struct sk_buff *skb; struct kni_vhost_queue *q = kni->vhost_queue; @@ -173,19 +163,19 @@ kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m, if (unlikely(skb == NULL)) return 0; - kva = (struct rte_kni_mbuf*)skb->data; + kva = (struct rte_kni_mbuf *)skb->data; /* free skb to cache */ skb->data = NULL; - if (unlikely(1 != kni_fifo_put(q->fifo, (void **)&skb, 1))) + if (unlikely(kni_fifo_put(q->fifo, (void **)&skb, 1) != 1)) /* Failing should not happen */ - KNI_ERR("Fail to enqueue entries into rx cache fifo\n"); + pr_err("Fail to enqueue entries into rx cache fifo\n"); pkt_len = kva->data_len; if (unlikely(pkt_len > len)) goto drop; - KNI_DBG_RX("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n", + pr_debug("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n", #ifdef HAVE_IOV_ITER_MSGHDR offset, len, pkt_len, (int)m->msg_iter.iov->iov_len); #else @@ -205,12 +195,12 @@ kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m, kni->stats.rx_packets++; /* enqueue mbufs into free_q */ - va = (void*)kva - kni->mbuf_kva + kni->mbuf_va; - if (unlikely(1 != kni_fifo_put(kni->free_q, (void **)&va, 1))) + va = (void *)kva - kni->mbuf_kva + kni->mbuf_va; + if (unlikely(kni_fifo_put(kni->free_q, (void **)&va, 1) != 1)) /* Failing should not happen */ - KNI_ERR("Fail to enqueue entries into free_q\n"); + pr_err("Fail to enqueue entries into free_q\n"); - KNI_DBG_RX("receive done %d\n", pkt_len); + pr_debug("receive done %d\n", pkt_len); return pkt_len; @@ -221,29 +211,25 @@ drop: return 0; } -static unsigned int -kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait) +static uint32_t +kni_sock_poll(struct file *file, struct socket *sock, poll_table *wait) { struct kni_vhost_queue *q = container_of(sock->sk, struct kni_vhost_queue, sk); struct kni_dev *kni; - unsigned int mask = 0; + uint32_t mask = 0; if (unlikely(q == NULL || q->kni == NULL)) return POLLERR; kni = q->kni; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) - KNI_DBG("start kni_poll on group %d, wq 0x%16llx\n", +#ifdef HAVE_SOCKET_WQ + pr_debug("start kni_poll on group %d, wq 0x%16llx\n", kni->group_id, (uint64_t)sock->wq); -#else - KNI_DBG("start kni_poll on group %d, wait at 0x%16llx\n", - kni->group_id, (uint64_t)&sock->wait); -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) poll_wait(file, &sock->wq->wait, wait); #else + pr_debug("start kni_poll on group %d, wait at 0x%16llx\n", + kni->group_id, (uint64_t)&sock->wait); poll_wait(file, &sock->wait, wait); #endif @@ -252,11 +238,12 @@ kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait) if (sock_writeable(&q->sk) || #ifdef SOCKWQ_ASYNC_NOSPACE - (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) && + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) && + sock_writeable(&q->sk))) #else - (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) && + (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) && + sock_writeable(&q->sk))) #endif - sock_writeable(&q->sk))) mask |= POLLOUT | POLLWRNORM; return mask; @@ -269,7 +256,7 @@ kni_vhost_enqueue(struct kni_dev *kni, struct kni_vhost_queue *q, struct rte_kni_mbuf *kva; kva = (void *)(va) - kni->mbuf_va + kni->mbuf_kva; - (skb)->data = (unsigned char*)kva; + (skb)->data = (unsigned char *)kva; (skb)->len = kva->data_len; skb_queue_tail(&q->sk.sk_receive_queue, skb); } @@ -279,6 +266,7 @@ kni_vhost_enqueue_burst(struct kni_dev *kni, struct kni_vhost_queue *q, struct sk_buff **skb, struct rte_kni_mbuf **va) { int i; + for (i = 0; i < RX_BURST_SZ; skb++, va++, i++) kni_vhost_enqueue(kni, q, *skb, *va); } @@ -287,9 +275,9 @@ int kni_chk_vhost_rx(struct kni_dev *kni) { struct kni_vhost_queue *q = kni->vhost_queue; - unsigned nb_in, nb_mbuf, nb_skb; - const unsigned BURST_MASK = RX_BURST_SZ - 1; - unsigned nb_burst, nb_backlog, i; + uint32_t nb_in, nb_mbuf, nb_skb; + const uint32_t BURST_MASK = RX_BURST_SZ - 1; + uint32_t nb_burst, nb_backlog, i; struct sk_buff *skb[RX_BURST_SZ]; struct rte_kni_mbuf *va[RX_BURST_SZ]; @@ -305,20 +293,18 @@ kni_chk_vhost_rx(struct kni_dev *kni) nb_mbuf = kni_fifo_count(kni->rx_q); nb_in = min(nb_mbuf, nb_skb); - nb_in = min(nb_in, (unsigned)RX_BURST_SZ); + nb_in = min_t(uint32_t, nb_in, RX_BURST_SZ); nb_burst = (nb_in & ~BURST_MASK); nb_backlog = (nb_in & BURST_MASK); /* enqueue skb_queue per BURST_SIZE bulk */ - if (0 != nb_burst) { - if (unlikely(RX_BURST_SZ != kni_fifo_get( - kni->rx_q, (void **)&va, - RX_BURST_SZ))) + if (nb_burst != 0) { + if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, RX_BURST_SZ) + != RX_BURST_SZ)) goto except; - if (unlikely(RX_BURST_SZ != kni_fifo_get( - q->fifo, (void **)&skb, - RX_BURST_SZ))) + if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, RX_BURST_SZ) + != RX_BURST_SZ)) goto except; kni_vhost_enqueue_burst(kni, q, skb, va); @@ -326,12 +312,10 @@ kni_chk_vhost_rx(struct kni_dev *kni) /* all leftover, do one by one */ for (i = 0; i < nb_backlog; ++i) { - if (unlikely(1 != kni_fifo_get( - kni->rx_q,(void **)&va, 1))) + if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, 1) != 1)) goto except; - if (unlikely(1 != kni_fifo_get( - q->fifo, (void **)&skb, 1))) + if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, 1) != 1)) goto except; kni_vhost_enqueue(kni, q, *skb, *va); @@ -342,7 +326,7 @@ kni_chk_vhost_rx(struct kni_dev *kni) ((nb_mbuf < RX_BURST_SZ) && (nb_mbuf != 0))) { wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND); - KNI_DBG_RX("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n", + pr_debug("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n", nb_mbuf, nb_skb, nb_in); } @@ -350,7 +334,7 @@ kni_chk_vhost_rx(struct kni_dev *kni) except: /* Failing should not happen */ - KNI_ERR("Fail to enqueue fifo, it shouldn't happen \n"); + pr_err("Fail to enqueue fifo, it shouldn't happen\n"); BUG_ON(1); return 0; @@ -373,7 +357,7 @@ kni_sock_sndmsg(struct socket *sock, if (unlikely(q == NULL || q->kni == NULL)) return 0; - KNI_DBG_TX("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n", + pr_debug("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n", #ifdef HAVE_IOV_ITER_MSGHDR len, q->flags, (int)m->msg_iter.iov->iov_len); #else @@ -420,13 +404,14 @@ kni_sock_rcvmsg(struct socket *sock, #ifdef RTE_KNI_VHOST_VNET_HDR_EN if (likely(q->flags & IFF_VNET_HDR)) { vnet_hdr_len = q->vnet_hdr_sz; - if ((len -= vnet_hdr_len) < 0) + len -= vnet_hdr_len; + if (len < 0) return -EINVAL; } #endif - if (unlikely(0 == (pkt_len = kni_vhost_net_rx(q->kni, - m, vnet_hdr_len, len)))) + pkt_len = kni_vhost_net_rx(q->kni, m, vnet_hdr_len, len); + if (unlikely(pkt_len == 0)) return 0; #ifdef RTE_KNI_VHOST_VNET_HDR_EN @@ -440,7 +425,7 @@ kni_sock_rcvmsg(struct socket *sock, #endif /* HAVE_IOV_ITER_MSGHDR */ return -EFAULT; #endif /* RTE_KNI_VHOST_VNET_HDR_EN */ - KNI_DBG_RX("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n", + pr_debug("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n", (unsigned long)len, q->flags, pkt_len); return pkt_len + vnet_hdr_len; @@ -448,25 +433,24 @@ kni_sock_rcvmsg(struct socket *sock, /* dummy tap like ioctl */ static int -kni_sock_ioctl(struct socket *sock, unsigned int cmd, - unsigned long arg) +kni_sock_ioctl(struct socket *sock, uint32_t cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct ifreq __user *ifr = argp; - unsigned int __user *up = argp; + uint32_t __user *up = argp; struct kni_vhost_queue *q = container_of(sock->sk, struct kni_vhost_queue, sk); struct kni_dev *kni; - unsigned int u; + uint32_t u; int __user *sp = argp; int s; int ret; - KNI_DBG("tap ioctl cmd 0x%08x\n", cmd); + pr_debug("tap ioctl cmd 0x%08x\n", cmd); switch (cmd) { case TUNSETIFF: - KNI_DBG("TUNSETIFF\n"); + pr_debug("TUNSETIFF\n"); /* ignore the name, just look at flags */ if (get_user(u, &ifr->ifr_flags)) return -EFAULT; @@ -480,7 +464,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, return ret; case TUNGETIFF: - KNI_DBG("TUNGETIFF\n"); + pr_debug("TUNGETIFF\n"); rcu_read_lock_bh(); kni = rcu_dereference_bh(q->kni); if (kni) @@ -491,14 +475,14 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, return -ENOLINK; ret = 0; - if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) || - put_user(q->flags, &ifr->ifr_flags)) + if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) + || put_user(q->flags, &ifr->ifr_flags)) ret = -EFAULT; dev_put(kni->net_dev); return ret; case TUNGETFEATURES: - KNI_DBG("TUNGETFEATURES\n"); + pr_debug("TUNGETFEATURES\n"); u = IFF_TAP | IFF_NO_PI; #ifdef RTE_KNI_VHOST_VNET_HDR_EN u |= IFF_VNET_HDR; @@ -508,7 +492,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, return 0; case TUNSETSNDBUF: - KNI_DBG("TUNSETSNDBUF\n"); + pr_debug("TUNSETSNDBUF\n"); if (get_user(u, up)) return -EFAULT; @@ -519,7 +503,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, s = q->vnet_hdr_sz; if (put_user(s, sp)) return -EFAULT; - KNI_DBG("TUNGETVNETHDRSZ %d\n", s); + pr_debug("TUNGETVNETHDRSZ %d\n", s); return 0; case TUNSETVNETHDRSZ: @@ -528,12 +512,12 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, if (s < (int)sizeof(struct virtio_net_hdr)) return -EINVAL; - KNI_DBG("TUNSETVNETHDRSZ %d\n", s); + pr_debug("TUNSETVNETHDRSZ %d\n", s); q->vnet_hdr_sz = s; return 0; case TUNSETOFFLOAD: - KNI_DBG("TUNSETOFFLOAD %lx\n", arg); + pr_debug("TUNSETOFFLOAD %lx\n", arg); #ifdef RTE_KNI_VHOST_VNET_HDR_EN /* not support any offload yet */ if (!(q->flags & IFF_VNET_HDR)) @@ -545,26 +529,26 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd, #endif default: - KNI_DBG("NOT SUPPORT\n"); + pr_debug("NOT SUPPORT\n"); return -EINVAL; } } static int -kni_sock_compat_ioctl(struct socket *sock, unsigned int cmd, +kni_sock_compat_ioctl(struct socket *sock, uint32_t cmd, unsigned long arg) { /* 32 bits app on 64 bits OS to be supported later */ - KNI_PRINT("Not implemented.\n"); + pr_debug("Not implemented.\n"); return -EINVAL; } #define KNI_VHOST_WAIT_WQ_SAFE() \ -do { \ +do { \ while ((BE_FINISH | BE_STOP) == kni->vq_status) \ - msleep(1); \ -}while(0) \ + msleep(1); \ +} while (0) \ static int @@ -577,7 +561,8 @@ kni_sock_release(struct socket *sock) if (q == NULL) return 0; - if (NULL != (kni = q->kni)) { + kni = q->kni; + if (kni != NULL) { kni->vq_status = BE_STOP; KNI_VHOST_WAIT_WQ_SAFE(); kni->vhost_queue = NULL; @@ -592,18 +577,17 @@ kni_sock_release(struct socket *sock) sock_put(&q->sk); - KNI_DBG("dummy sock release done\n"); + pr_debug("dummy sock release done\n"); return 0; } int -kni_sock_getname (struct socket *sock, - struct sockaddr *addr, - int *sockaddr_len, int peer) +kni_sock_getname(struct socket *sock, struct sockaddr *addr, + int *sockaddr_len, int peer) { - KNI_DBG("dummy sock getname\n"); - ((struct sockaddr_ll*)addr)->sll_family = AF_PACKET; + pr_debug("dummy sock getname\n"); + ((struct sockaddr_ll *)addr)->sll_family = AF_PACKET; return 0; } @@ -646,7 +630,7 @@ kni_sk_destruct(struct sock *sk) /* make sure there's no packet in buffer */ while (skb_dequeue(&sk->sk_receive_queue) != NULL) - ; + ; mb(); @@ -673,7 +657,7 @@ kni_vhost_backend_init(struct kni_dev *kni) if (kni->vhost_queue != NULL) return -1; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) +#ifdef HAVE_SK_ALLOC_KERN_PARAM q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &kni_raw_proto, 0); #else @@ -694,8 +678,9 @@ kni_vhost_backend_init(struct kni_dev *kni) } /* cache init */ - q->cache = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff), - GFP_KERNEL); + q->cache = kzalloc( + RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff), + GFP_KERNEL); if (!q->cache) goto free_fd; @@ -708,7 +693,7 @@ kni_vhost_backend_init(struct kni_dev *kni) for (i = 0; i < RTE_KNI_VHOST_MAX_CACHE_SIZE; i++) { elem = &q->cache[i]; - kni_fifo_put(fifo, (void**)&elem, 1); + kni_fifo_put(fifo, (void **)&elem, 1); } q->fifo = fifo; @@ -738,14 +723,12 @@ kni_vhost_backend_init(struct kni_dev *kni) kni->vq_status = BE_START; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) - KNI_DBG("backend init sockfd=%d, sock->wq=0x%16llx," - "sk->sk_wq=0x%16llx", +#ifdef HAVE_SOCKET_WQ + pr_debug("backend init sockfd=%d, sock->wq=0x%16llx,sk->sk_wq=0x%16llx", q->sockfd, (uint64_t)q->sock->wq, (uint64_t)q->sk.sk_wq); #else - KNI_DBG("backend init sockfd=%d, sock->wait at 0x%16llx," - "sk->sk_sleep=0x%16llx", + pr_debug("backend init sockfd=%d, sock->wait at 0x%16llx,sk->sk_sleep=0x%16llx", q->sockfd, (uint64_t)&q->sock->wait, (uint64_t)q->sk.sk_sleep); #endif @@ -768,7 +751,7 @@ free_sock: q->sock = NULL; free_sk: - sk_free((struct sock*)q); + sk_free((struct sock *)q); return err; } @@ -781,6 +764,7 @@ show_sock_fd(struct device *dev, struct device_attribute *attr, struct net_device *net_dev = container_of(dev, struct net_device, dev); struct kni_dev *kni = netdev_priv(net_dev); int sockfd = -1; + if (kni->vhost_queue != NULL) sockfd = kni->vhost_queue->sockfd; return snprintf(buf, 10, "%d\n", sockfd); @@ -792,6 +776,7 @@ show_sock_en(struct device *dev, struct device_attribute *attr, { struct net_device *net_dev = container_of(dev, struct net_device, dev); struct kni_dev *kni = netdev_priv(net_dev); + return snprintf(buf, 10, "%u\n", (kni->vhost_queue == NULL ? 0 : 1)); } @@ -804,7 +789,7 @@ set_sock_en(struct device *dev, struct device_attribute *attr, unsigned long en; int err = 0; - if (0 != kstrtoul(buf, 0, &en)) + if (kstrtoul(buf, 0, &en) != 0) return -EINVAL; if (en) @@ -818,7 +803,7 @@ static DEVICE_ATTR(sock_en, S_IRUGO | S_IWUSR, show_sock_en, set_sock_en); static struct attribute *dev_attrs[] = { &dev_attr_sock_fd.attr, &dev_attr_sock_en.attr, - NULL, + NULL, }; static const struct attribute_group dev_attr_grp = { @@ -836,7 +821,7 @@ kni_vhost_backend_release(struct kni_dev *kni) /* dettach from kni */ q->kni = NULL; - KNI_DBG("release backend done\n"); + pr_debug("release backend done\n"); return 0; } @@ -851,7 +836,7 @@ kni_vhost_init(struct kni_dev *kni) kni->vq_status = BE_STOP; - KNI_DBG("kni_vhost_init done\n"); + pr_debug("kni_vhost_init done\n"); return 0; } diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile index 0bb5dc90..efe1e5fe 100644 --- a/lib/librte_ether/Makefile +++ b/lib/librte_ether/Makefile @@ -34,26 +34,25 @@ include $(RTE_SDK)/mk/rte.vars.mk # # library name # -LIB = libethdev.a +LIB = librte_ethdev.a CFLAGS += -O3 CFLAGS += $(WERROR_FLAGS) EXPORT_MAP := rte_ether_version.map -LIBABIVER := 4 +LIBABIVER := 5 SRCS-y += rte_ethdev.c # # Export include files # -SYMLINK-y-include += rte_ether.h SYMLINK-y-include += rte_ethdev.h SYMLINK-y-include += rte_eth_ctrl.h SYMLINK-y-include += rte_dev_info.h # this lib depends upon: -DEPDIRS-y += lib/librte_eal lib/librte_mempool lib/librte_ring lib/librte_mbuf +DEPDIRS-y += lib/librte_net lib/librte_eal lib/librte_mempool lib/librte_ring lib/librte_mbuf include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_ether/rte_dev_info.h b/lib/librte_ether/rte_dev_info.h index 574683d3..aab6d1a6 100644 --- a/lib/librte_ether/rte_dev_info.h +++ b/lib/librte_ether/rte_dev_info.h @@ -34,6 +34,8 @@ #ifndef _RTE_DEV_INFO_H_ #define _RTE_DEV_INFO_H_ +#include + /* * Placeholder for accessing device registers */ diff --git a/lib/librte_ether/rte_eth_ctrl.h b/lib/librte_ether/rte_eth_ctrl.h index c3a2c9e4..fe80eb01 100644 --- a/lib/librte_ether/rte_eth_ctrl.h +++ b/lib/librte_ether/rte_eth_ctrl.h @@ -34,6 +34,10 @@ #ifndef _RTE_ETH_CTRL_H_ #define _RTE_ETH_CTRL_H_ +#include +#include +#include "rte_ether.h" + /** * @file * diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index a5b42aa8..fde8112f 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -58,7 +58,6 @@ #include #include #include -#include #include #include #include @@ -72,6 +71,7 @@ static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data"; struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS]; static struct rte_eth_dev_data *rte_eth_dev_data; +static uint8_t eth_dev_last_created_port; static uint8_t nb_ports; /* spinlock for eth device callbacks */ @@ -190,7 +190,7 @@ rte_eth_dev_find_free_port(void) } struct rte_eth_dev * -rte_eth_dev_allocate(const char *name, enum rte_eth_dev_type type) +rte_eth_dev_allocate(const char *name) { uint8_t port_id; struct rte_eth_dev *eth_dev; @@ -215,25 +215,11 @@ rte_eth_dev_allocate(const char *name, enum rte_eth_dev_type type) snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name); eth_dev->data->port_id = port_id; eth_dev->attached = DEV_ATTACHED; - eth_dev->dev_type = type; + eth_dev_last_created_port = port_id; nb_ports++; return eth_dev; } -static int -rte_eth_dev_create_unique_device_name(char *name, size_t size, - struct rte_pci_device *pci_dev) -{ - int ret; - - ret = snprintf(name, size, "%d:%d.%d", - pci_dev->addr.bus, pci_dev->addr.devid, - pci_dev->addr.function); - if (ret < 0) - return ret; - return 0; -} - int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev) { @@ -245,9 +231,9 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev) return 0; } -static int -rte_eth_dev_init(struct rte_pci_driver *pci_drv, - struct rte_pci_device *pci_dev) +int +rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev) { struct eth_driver *eth_drv; struct rte_eth_dev *eth_dev; @@ -257,11 +243,10 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv, eth_drv = (struct eth_driver *)pci_drv; - /* Create unique Ethernet device name using PCI address */ - rte_eth_dev_create_unique_device_name(ethdev_name, - sizeof(ethdev_name), pci_dev); + rte_eal_pci_device_name(&pci_dev->addr, ethdev_name, + sizeof(ethdev_name)); - eth_dev = rte_eth_dev_allocate(ethdev_name, RTE_ETH_DEV_PCI); + eth_dev = rte_eth_dev_allocate(ethdev_name); if (eth_dev == NULL) return -ENOMEM; @@ -290,7 +275,7 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv, return 0; RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(vendor_id=0x%x device_id=0x%x) failed\n", - pci_drv->name, + pci_drv->driver.name, (unsigned) pci_dev->id.vendor_id, (unsigned) pci_dev->id.device_id); if (rte_eal_process_type() == RTE_PROC_PRIMARY) @@ -299,8 +284,8 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv, return diag; } -static int -rte_eth_dev_uninit(struct rte_pci_device *pci_dev) +int +rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev) { const struct eth_driver *eth_drv; struct rte_eth_dev *eth_dev; @@ -310,9 +295,8 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev) if (pci_dev == NULL) return -EINVAL; - /* Create unique Ethernet device name using PCI address */ - rte_eth_dev_create_unique_device_name(ethdev_name, - sizeof(ethdev_name), pci_dev); + rte_eal_pci_device_name(&pci_dev->addr, ethdev_name, + sizeof(ethdev_name)); eth_dev = rte_eth_dev_allocated(ethdev_name); if (eth_dev == NULL) @@ -340,28 +324,6 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev) return 0; } -/** - * Register an Ethernet [Poll Mode] driver. - * - * Function invoked by the initialization function of an Ethernet driver - * to simultaneously register itself as a PCI driver and as an Ethernet - * Poll Mode Driver. - * Invokes the rte_eal_pci_register() function to register the *pci_drv* - * structure embedded in the *eth_drv* structure, after having stored the - * address of the rte_eth_dev_init() function in the *devinit* field of - * the *pci_drv* structure. - * During the PCI probing phase, the rte_eth_dev_init() function is - * invoked for each PCI [Ethernet device] matching the embedded PCI - * identifiers provided by the driver. - */ -void -rte_eth_driver_register(struct eth_driver *eth_drv) -{ - eth_drv->pci_drv.devinit = rte_eth_dev_init; - eth_drv->pci_drv.devuninit = rte_eth_dev_uninit; - rte_eal_pci_register(ð_drv->pci_drv); -} - int rte_eth_dev_is_valid_port(uint8_t port_id) { @@ -385,27 +347,6 @@ rte_eth_dev_count(void) return nb_ports; } -static enum rte_eth_dev_type -rte_eth_dev_get_device_type(uint8_t port_id) -{ - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, RTE_ETH_DEV_UNKNOWN); - return rte_eth_devices[port_id].dev_type; -} - -static int -rte_eth_dev_get_addr_by_port(uint8_t port_id, struct rte_pci_addr *addr) -{ - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); - - if (addr == NULL) { - RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); - return -EINVAL; - } - - *addr = rte_eth_devices[port_id].pci_dev->addr; - return 0; -} - int rte_eth_dev_get_name_by_port(uint8_t port_id, char *name) { @@ -450,34 +391,6 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id) return -ENODEV; } -static int -rte_eth_dev_get_port_by_addr(const struct rte_pci_addr *addr, uint8_t *port_id) -{ - int i; - struct rte_pci_device *pci_dev = NULL; - - if (addr == NULL) { - RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); - return -EINVAL; - } - - *port_id = RTE_MAX_ETHPORTS; - - for (i = 0; i < RTE_MAX_ETHPORTS; i++) { - - pci_dev = rte_eth_devices[i].pci_dev; - - if (pci_dev && - !rte_eal_compare_pci_addr(&pci_dev->addr, addr)) { - - *port_id = i; - - return 0; - } - } - return -ENODEV; -} - static int rte_eth_dev_is_detachable(uint8_t port_id) { @@ -503,127 +416,49 @@ rte_eth_dev_is_detachable(uint8_t port_id) return 1; } -/* attach the new physical device, then store port_id of the device */ -static int -rte_eth_dev_attach_pdev(struct rte_pci_addr *addr, uint8_t *port_id) +/* attach the new device, then store port_id of the device */ +int +rte_eth_dev_attach(const char *devargs, uint8_t *port_id) { - /* re-construct pci_device_list */ - if (rte_eal_pci_scan()) - goto err; - /* Invoke probe func of the driver can handle the new device. */ - if (rte_eal_pci_probe_one(addr)) - goto err; + int ret = -1; + int current = rte_eth_dev_count(); + char *name = NULL; + char *args = NULL; - if (rte_eth_dev_get_port_by_addr(addr, port_id)) + if ((devargs == NULL) || (port_id == NULL)) { + ret = -EINVAL; goto err; + } - return 0; -err: - return -1; -} - -/* detach the new physical device, then store pci_addr of the device */ -static int -rte_eth_dev_detach_pdev(uint8_t port_id, struct rte_pci_addr *addr) -{ - struct rte_pci_addr freed_addr; - struct rte_pci_addr vp; - - /* get pci address by port id */ - if (rte_eth_dev_get_addr_by_port(port_id, &freed_addr)) + /* parse devargs, then retrieve device name and args */ + if (rte_eal_parse_devargs_str(devargs, &name, &args)) goto err; - /* Zeroed pci addr means the port comes from virtual device */ - vp.domain = vp.bus = vp.devid = vp.function = 0; - if (rte_eal_compare_pci_addr(&vp, &freed_addr) == 0) + ret = rte_eal_dev_attach(name, args); + if (ret < 0) goto err; - /* invoke devuninit func of the pci driver, - * also remove the device from pci_device_list */ - if (rte_eal_pci_detach(&freed_addr)) + /* no point looking at the port count if no port exists */ + if (!rte_eth_dev_count()) { + RTE_LOG(ERR, EAL, "No port found for device (%s)\n", name); + ret = -1; goto err; + } - *addr = freed_addr; - return 0; -err: - return -1; -} - -/* attach the new virtual device, then store port_id of the device */ -static int -rte_eth_dev_attach_vdev(const char *vdevargs, uint8_t *port_id) -{ - char *name = NULL, *args = NULL; - int ret = -1; - - /* parse vdevargs, then retrieve device name and args */ - if (rte_eal_parse_devargs_str(vdevargs, &name, &args)) - goto end; - - /* walk around dev_driver_list to find the driver of the device, - * then invoke probe function of the driver. - * rte_eal_vdev_init() updates port_id allocated after - * initialization. + /* if nothing happened, there is a bug here, since some driver told us + * it did attach a device, but did not create a port. */ - if (rte_eal_vdev_init(name, args)) - goto end; - - if (rte_eth_dev_get_port_by_name(name, port_id)) - goto end; - - ret = 0; -end: - free(name); - free(args); - - return ret; -} - -/* detach the new virtual device, then store the name of the device */ -static int -rte_eth_dev_detach_vdev(uint8_t port_id, char *vdevname) -{ - char name[RTE_ETH_NAME_MAX_LEN]; - - /* get device name by port id */ - if (rte_eth_dev_get_name_by_port(port_id, name)) - goto err; - /* walk around dev_driver_list to find the driver of the device, - * then invoke uninit function of the driver */ - if (rte_eal_vdev_uninit(name)) - goto err; - - strncpy(vdevname, name, sizeof(name)); - return 0; -err: - return -1; -} - -/* attach the new device, then store port_id of the device */ -int -rte_eth_dev_attach(const char *devargs, uint8_t *port_id) -{ - struct rte_pci_addr addr; - int ret = -1; - - if ((devargs == NULL) || (port_id == NULL)) { - ret = -EINVAL; + if (current == rte_eth_dev_count()) { + ret = -1; goto err; } - if (eal_parse_pci_DomBDF(devargs, &addr) == 0) { - ret = rte_eth_dev_attach_pdev(&addr, port_id); - if (ret < 0) - goto err; - } else { - ret = rte_eth_dev_attach_vdev(devargs, port_id); - if (ret < 0) - goto err; - } + *port_id = eth_dev_last_created_port; + ret = 0; - return 0; err: - RTE_LOG(ERR, EAL, "Driver, cannot attach the device\n"); + free(name); + free(args); return ret; } @@ -631,7 +466,6 @@ err: int rte_eth_dev_detach(uint8_t port_id, char *name) { - struct rte_pci_addr addr; int ret = -1; if (name == NULL) { @@ -639,33 +473,19 @@ rte_eth_dev_detach(uint8_t port_id, char *name) goto err; } - /* check whether the driver supports detach feature, or not */ + /* FIXME: move this to eal, once device flags are relocated there */ if (rte_eth_dev_is_detachable(port_id)) goto err; - if (rte_eth_dev_get_device_type(port_id) == RTE_ETH_DEV_PCI) { - ret = rte_eth_dev_get_addr_by_port(port_id, &addr); - if (ret < 0) - goto err; - - ret = rte_eth_dev_detach_pdev(port_id, &addr); - if (ret < 0) - goto err; - - snprintf(name, RTE_ETH_NAME_MAX_LEN, - "%04x:%02x:%02x.%d", - addr.domain, addr.bus, - addr.devid, addr.function); - } else { - ret = rte_eth_dev_detach_vdev(port_id, name); - if (ret < 0) - goto err; - } + snprintf(name, sizeof(rte_eth_devices[port_id].data->name), + "%s", rte_eth_devices[port_id].data->name); + ret = rte_eal_dev_detach(name); + if (ret < 0) + goto err; return 0; err: - RTE_LOG(ERR, EAL, "Driver, cannot detach the device\n"); return ret; } @@ -2689,7 +2509,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id, void _rte_eth_dev_callback_process(struct rte_eth_dev *dev, - enum rte_eth_event_type event) + enum rte_eth_event_type event, void *cb_arg) { struct rte_eth_dev_callback *cb_lst; struct rte_eth_dev_callback dev_cb; @@ -2700,6 +2520,9 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev, continue; dev_cb = *cb_lst; cb_lst->active = 1; + if (cb_arg != NULL) + dev_cb.cb_arg = (void *) cb_arg; + rte_spinlock_unlock(&rte_eth_dev_cb_lock); dev_cb.cb_fn(dev->data->port_id, dev_cb.event, dev_cb.cb_arg); @@ -2749,7 +2572,7 @@ rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name, const struct rte_memzone *mz; snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", - dev->driver->pci_drv.name, ring_name, + dev->driver->pci_drv.driver.name, ring_name, dev->data->port_id, queue_id); mz = rte_memzone_lookup(z_name); @@ -3390,8 +3213,8 @@ rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, struct rte_pci_device *pci_de eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE; eth_dev->data->kdrv = pci_dev->kdrv; - eth_dev->data->numa_node = pci_dev->numa_node; - eth_dev->data->drv_name = pci_dev->driver->name; + eth_dev->data->numa_node = pci_dev->device.numa_node; + eth_dev->data->drv_name = pci_dev->driver->driver.name; } int diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index b0fe0334..96781792 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -190,6 +190,9 @@ struct rte_mbuf; /** * A structure used to retrieve statistics for an Ethernet port. + * Not all statistics fields in struct rte_eth_stats are supported + * by any type of network interface card (NIC). If any statistics + * field is not supported, its value is 0. */ struct rte_eth_stats { uint64_t ipackets; /**< Total number of successfully received packets. */ @@ -198,7 +201,7 @@ struct rte_eth_stats { uint64_t obytes; /**< Total number of successfully transmitted bytes. */ uint64_t imissed; /**< Total of RX packets dropped by the HW, - * because there are no available mbufs (i.e. RX queues are full). + * because there are no available buffer (i.e. RX queues are full). */ uint64_t ierrors; /**< Total number of erroneous received packets. */ uint64_t oerrors; /**< Total number of failed transmitted packets. */ @@ -255,6 +258,7 @@ struct rte_eth_stats { /** * A structure used to retrieve link-level information of an Ethernet port. */ +__extension__ struct rte_eth_link { uint32_t link_speed; /**< ETH_SPEED_NUM_ */ uint16_t link_duplex : 1; /**< ETH_LINK_[HALF/FULL]_DUPLEX */ @@ -346,6 +350,7 @@ struct rte_eth_rxmode { enum rte_eth_rx_mq_mode mq_mode; uint32_t max_rx_pkt_len; /**< Only used if jumbo_frame enabled. */ uint16_t split_hdr_size; /**< hdr buf size (header_split enabled).*/ + __extension__ uint16_t header_split : 1, /**< Header Split enable. */ hw_ip_checksum : 1, /**< IP/UDP/TCP checksum offload enable. */ hw_vlan_filter : 1, /**< VLAN filter enable. */ @@ -645,6 +650,7 @@ struct rte_eth_txmode { /* For i40e specifically */ uint16_t pvid; + __extension__ uint8_t hw_vlan_reject_tagged : 1, /**< If set, reject sending out tagged pkts */ hw_vlan_reject_untagged : 1, @@ -864,6 +870,10 @@ struct rte_eth_conf { #define DEV_TX_OFFLOAD_UDP_TSO 0x00000040 #define DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000080 /**< Used for tunneling packet. */ #define DEV_TX_OFFLOAD_QINQ_INSERT 0x00000100 +#define DEV_TX_OFFLOAD_VXLAN_TNL_TSO 0x00000200 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_GRE_TNL_TSO 0x00000400 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_IPIP_TNL_TSO 0x00000800 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_GENEVE_TNL_TSO 0x00001000 /**< Used for tunneling packet. */ /** * Ethernet device information @@ -1602,17 +1612,6 @@ struct rte_eth_rxtx_callback { void *param; }; -/** - * The eth device type. - */ -enum rte_eth_dev_type { - RTE_ETH_DEV_UNKNOWN, /**< unknown device type */ - RTE_ETH_DEV_PCI, - /**< Physical function and Virtual function of PCI devices */ - RTE_ETH_DEV_VIRTUAL, /**< non hardware device */ - RTE_ETH_DEV_MAX /**< max value of this enum */ -}; - /** * @internal * The generic data structure associated with each ethernet device. @@ -1643,7 +1642,6 @@ struct rte_eth_dev { */ struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT]; uint8_t attached; /**< Flag indicating the port is attached */ - enum rte_eth_dev_type dev_type; /**< Flag indicating the device type */ } __rte_cache_aligned; struct rte_eth_dev_sriov { @@ -1691,6 +1689,7 @@ struct rte_eth_dev_data { struct ether_addr* hash_mac_addrs; /** Device Ethernet MAC addresses of hash filtering. */ uint8_t port_id; /**< Device [external] port identifier. */ + __extension__ uint8_t promiscuous : 1, /**< RX promiscuous mode ON(1) / OFF(0). */ scattered_rx : 1, /**< RX of scattered packets is ON(1) / OFF(0) */ all_multicast : 1, /**< RX all multicast mode ON(1) / OFF(0). */ @@ -1756,8 +1755,7 @@ struct rte_eth_dev *rte_eth_dev_allocated(const char *name); * @return * - Slot in the rte_dev_devices array for a new device; */ -struct rte_eth_dev *rte_eth_dev_allocate(const char *name, - enum rte_eth_dev_type type); +struct rte_eth_dev *rte_eth_dev_allocate(const char *name); /** * @internal @@ -1776,7 +1774,7 @@ int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev); * @param devargs * A pointer to a strings array describing the new device * to be attached. The strings should be a pci address like - * '0000:01:00.0' or virtual device name like 'eth_pcap0'. + * '0000:01:00.0' or virtual device name like 'net_pcap0'. * @param port_id * A pointer to a port identifier actually attached. * @return @@ -1870,18 +1868,6 @@ struct eth_driver { unsigned int dev_private_size; /**< Size of device private data. */ }; -/** - * @internal - * A function invoked by the initialization function of an Ethernet driver - * to simultaneously register itself as a PCI driver and as an Ethernet - * Poll Mode Driver (PMD). - * - * @param eth_drv - * The pointer to the *eth_driver* structure associated with - * the Ethernet driver. - */ -void rte_eth_driver_register(struct eth_driver *eth_drv); - /** * Convert a numerical speed in Mbps to a bitmap flag that can be used in * the bitmap link_speeds of the struct rte_eth_conf @@ -3047,6 +3033,7 @@ enum rte_eth_event_type { /**< queue state event (enabled/disabled) */ RTE_ETH_EVENT_INTR_RESET, /**< reset interrupt event, sent to VF on PF reset */ + RTE_ETH_EVENT_VF_MBOX, /**< message from the VF received by PF */ RTE_ETH_EVENT_MAX /**< max value of this enum */ }; @@ -3068,6 +3055,11 @@ typedef void (*rte_eth_dev_cb_fn)(uint8_t port_id, \ * @param cb_arg * Pointer to the parameters for the registered callback. * + * The user data is overwritten in the case of RTE_ETH_EVENT_VF_MBOX. + * This even occurs when a message from the VF is received by the PF. + * The user data is overwritten with struct rte_pmd_ixgbe_mb_event_param. + * This struct is defined in rte_pmd_ixgbe.h. + * * @return * - On success, zero. * - On failure, a negative value. @@ -3106,12 +3098,16 @@ int rte_eth_dev_callback_unregister(uint8_t port_id, * Pointer to struct rte_eth_dev. * @param event * Eth device interrupt event type. + * @param cb_arg + * Update callback parameter to pass data back to user application. + * This allows the user application to decide if a particular function + * is permitted or not. * * @return * void */ void _rte_eth_dev_callback_process(struct rte_eth_dev *dev, - enum rte_eth_event_type event); + enum rte_eth_event_type event, void *cb_arg); /** * When there is no rx packet coming in Rx Queue for a long time, we can @@ -4341,7 +4337,7 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id, /** * Get the port id from pci adrress or device name -* Ex: 0000:2:00.0 or vdev name eth_pcap0 +* Ex: 0000:2:00.0 or vdev name net_pcap0 * * @param name * pci address or name of the device @@ -4368,6 +4364,21 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id); int rte_eth_dev_get_name_by_port(uint8_t port_id, char *name); +/** + * @internal + * Wrapper for use by pci drivers as a .probe function to attach to a ethdev + * interface. + */ +int rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev); + +/** + * @internal + * Wrapper for use by pci drivers as a .remove function to detach a ethdev + * interface. + */ +int rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev); + #ifdef __cplusplus } #endif diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h deleted file mode 100644 index 1d62d8e5..00000000 --- a/lib/librte_ether/rte_ether.h +++ /dev/null @@ -1,416 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_ETHER_H_ -#define _RTE_ETHER_H_ - -/** - * @file - * - * Ethernet Helpers in RTE - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -#include -#include -#include -#include - -#define ETHER_ADDR_LEN 6 /**< Length of Ethernet address. */ -#define ETHER_TYPE_LEN 2 /**< Length of Ethernet type field. */ -#define ETHER_CRC_LEN 4 /**< Length of Ethernet CRC. */ -#define ETHER_HDR_LEN \ - (ETHER_ADDR_LEN * 2 + ETHER_TYPE_LEN) /**< Length of Ethernet header. */ -#define ETHER_MIN_LEN 64 /**< Minimum frame len, including CRC. */ -#define ETHER_MAX_LEN 1518 /**< Maximum frame len, including CRC. */ -#define ETHER_MTU \ - (ETHER_MAX_LEN - ETHER_HDR_LEN - ETHER_CRC_LEN) /**< Ethernet MTU. */ - -#define ETHER_MAX_VLAN_FRAME_LEN \ - (ETHER_MAX_LEN + 4) /**< Maximum VLAN frame length, including CRC. */ - -#define ETHER_MAX_JUMBO_FRAME_LEN \ - 0x3F00 /**< Maximum Jumbo frame length, including CRC. */ - -#define ETHER_MAX_VLAN_ID 4095 /**< Maximum VLAN ID. */ - -#define ETHER_MIN_MTU 68 /**< Minimum MTU for IPv4 packets, see RFC 791. */ - -/** - * Ethernet address: - * A universally administered address is uniquely assigned to a device by its - * manufacturer. The first three octets (in transmission order) contain the - * Organizationally Unique Identifier (OUI). The following three (MAC-48 and - * EUI-48) octets are assigned by that organization with the only constraint - * of uniqueness. - * A locally administered address is assigned to a device by a network - * administrator and does not contain OUIs. - * See http://standards.ieee.org/regauth/groupmac/tutorial.html - */ -struct ether_addr { - uint8_t addr_bytes[ETHER_ADDR_LEN]; /**< Address bytes in transmission order */ -} __attribute__((__packed__)); - -#define ETHER_LOCAL_ADMIN_ADDR 0x02 /**< Locally assigned Eth. address. */ -#define ETHER_GROUP_ADDR 0x01 /**< Multicast or broadcast Eth. address. */ - -/** - * Check if two Ethernet addresses are the same. - * - * @param ea1 - * A pointer to the first ether_addr structure containing - * the ethernet address. - * @param ea2 - * A pointer to the second ether_addr structure containing - * the ethernet address. - * - * @return - * True (1) if the given two ethernet address are the same; - * False (0) otherwise. - */ -static inline int is_same_ether_addr(const struct ether_addr *ea1, - const struct ether_addr *ea2) -{ - int i; - for (i = 0; i < ETHER_ADDR_LEN; i++) - if (ea1->addr_bytes[i] != ea2->addr_bytes[i]) - return 0; - return 1; -} - -/** - * Check if an Ethernet address is filled with zeros. - * - * @param ea - * A pointer to a ether_addr structure containing the ethernet address - * to check. - * @return - * True (1) if the given ethernet address is filled with zeros; - * false (0) otherwise. - */ -static inline int is_zero_ether_addr(const struct ether_addr *ea) -{ - int i; - for (i = 0; i < ETHER_ADDR_LEN; i++) - if (ea->addr_bytes[i] != 0x00) - return 0; - return 1; -} - -/** - * Check if an Ethernet address is a unicast address. - * - * @param ea - * A pointer to a ether_addr structure containing the ethernet address - * to check. - * @return - * True (1) if the given ethernet address is a unicast address; - * false (0) otherwise. - */ -static inline int is_unicast_ether_addr(const struct ether_addr *ea) -{ - return (ea->addr_bytes[0] & ETHER_GROUP_ADDR) == 0; -} - -/** - * Check if an Ethernet address is a multicast address. - * - * @param ea - * A pointer to a ether_addr structure containing the ethernet address - * to check. - * @return - * True (1) if the given ethernet address is a multicast address; - * false (0) otherwise. - */ -static inline int is_multicast_ether_addr(const struct ether_addr *ea) -{ - return ea->addr_bytes[0] & ETHER_GROUP_ADDR; -} - -/** - * Check if an Ethernet address is a broadcast address. - * - * @param ea - * A pointer to a ether_addr structure containing the ethernet address - * to check. - * @return - * True (1) if the given ethernet address is a broadcast address; - * false (0) otherwise. - */ -static inline int is_broadcast_ether_addr(const struct ether_addr *ea) -{ - const unaligned_uint16_t *ea_words = (const unaligned_uint16_t *)ea; - - return (ea_words[0] == 0xFFFF && ea_words[1] == 0xFFFF && - ea_words[2] == 0xFFFF); -} - -/** - * Check if an Ethernet address is a universally assigned address. - * - * @param ea - * A pointer to a ether_addr structure containing the ethernet address - * to check. - * @return - * True (1) if the given ethernet address is a universally assigned address; - * false (0) otherwise. - */ -static inline int is_universal_ether_addr(const struct ether_addr *ea) -{ - return (ea->addr_bytes[0] & ETHER_LOCAL_ADMIN_ADDR) == 0; -} - -/** - * Check if an Ethernet address is a locally assigned address. - * - * @param ea - * A pointer to a ether_addr structure containing the ethernet address - * to check. - * @return - * True (1) if the given ethernet address is a locally assigned address; - * false (0) otherwise. - */ -static inline int is_local_admin_ether_addr(const struct ether_addr *ea) -{ - return (ea->addr_bytes[0] & ETHER_LOCAL_ADMIN_ADDR) != 0; -} - -/** - * Check if an Ethernet address is a valid address. Checks that the address is a - * unicast address and is not filled with zeros. - * - * @param ea - * A pointer to a ether_addr structure containing the ethernet address - * to check. - * @return - * True (1) if the given ethernet address is valid; - * false (0) otherwise. - */ -static inline int is_valid_assigned_ether_addr(const struct ether_addr *ea) -{ - return is_unicast_ether_addr(ea) && (! is_zero_ether_addr(ea)); -} - -/** - * Generate a random Ethernet address that is locally administered - * and not multicast. - * @param addr - * A pointer to Ethernet address. - */ -static inline void eth_random_addr(uint8_t *addr) -{ - uint64_t rand = rte_rand(); - uint8_t *p = (uint8_t*)&rand; - - rte_memcpy(addr, p, ETHER_ADDR_LEN); - addr[0] &= ~ETHER_GROUP_ADDR; /* clear multicast bit */ - addr[0] |= ETHER_LOCAL_ADMIN_ADDR; /* set local assignment bit */ -} - -/** - * Fast copy an Ethernet address. - * - * @param ea_from - * A pointer to a ether_addr structure holding the Ethernet address to copy. - * @param ea_to - * A pointer to a ether_addr structure where to copy the Ethernet address. - */ -static inline void ether_addr_copy(const struct ether_addr *ea_from, - struct ether_addr *ea_to) -{ -#ifdef __INTEL_COMPILER - uint16_t *from_words = (uint16_t *)(ea_from->addr_bytes); - uint16_t *to_words = (uint16_t *)(ea_to->addr_bytes); - - to_words[0] = from_words[0]; - to_words[1] = from_words[1]; - to_words[2] = from_words[2]; -#else - /* - * Use the common way, because of a strange gcc warning. - */ - *ea_to = *ea_from; -#endif -} - -#define ETHER_ADDR_FMT_SIZE 18 -/** - * Format 48bits Ethernet address in pattern xx:xx:xx:xx:xx:xx. - * - * @param buf - * A pointer to buffer contains the formatted MAC address. - * @param size - * The format buffer size. - * @param eth_addr - * A pointer to a ether_addr structure. - */ -static inline void -ether_format_addr(char *buf, uint16_t size, - const struct ether_addr *eth_addr) -{ - snprintf(buf, size, "%02X:%02X:%02X:%02X:%02X:%02X", - eth_addr->addr_bytes[0], - eth_addr->addr_bytes[1], - eth_addr->addr_bytes[2], - eth_addr->addr_bytes[3], - eth_addr->addr_bytes[4], - eth_addr->addr_bytes[5]); -} - -/** - * Ethernet header: Contains the destination address, source address - * and frame type. - */ -struct ether_hdr { - struct ether_addr d_addr; /**< Destination address. */ - struct ether_addr s_addr; /**< Source address. */ - uint16_t ether_type; /**< Frame type. */ -} __attribute__((__packed__)); - -/** - * Ethernet VLAN Header. - * Contains the 16-bit VLAN Tag Control Identifier and the Ethernet type - * of the encapsulated frame. - */ -struct vlan_hdr { - uint16_t vlan_tci; /**< Priority (3) + CFI (1) + Identifier Code (12) */ - uint16_t eth_proto;/**< Ethernet type of encapsulated frame. */ -} __attribute__((__packed__)); - -/** - * VXLAN protocol header. - * Contains the 8-bit flag, 24-bit VXLAN Network Identifier and - * Reserved fields (24 bits and 8 bits) - */ -struct vxlan_hdr { - uint32_t vx_flags; /**< flag (8) + Reserved (24). */ - uint32_t vx_vni; /**< VNI (24) + Reserved (8). */ -} __attribute__((__packed__)); - -/* Ethernet frame types */ -#define ETHER_TYPE_IPv4 0x0800 /**< IPv4 Protocol. */ -#define ETHER_TYPE_IPv6 0x86DD /**< IPv6 Protocol. */ -#define ETHER_TYPE_ARP 0x0806 /**< Arp Protocol. */ -#define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */ -#define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */ -#define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */ -#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */ -#define ETHER_TYPE_TEB 0x6558 /**< Transparent Ethernet Bridging. */ - -#define ETHER_VXLAN_HLEN (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr)) -/**< VXLAN tunnel header length. */ - -/** - * Extract VLAN tag information into mbuf - * - * Software version of VLAN stripping - * - * @param m - * The packet mbuf. - * @return - * - 0: Success - * - 1: not a vlan packet - */ -static inline int rte_vlan_strip(struct rte_mbuf *m) -{ - struct ether_hdr *eh - = rte_pktmbuf_mtod(m, struct ether_hdr *); - - if (eh->ether_type != rte_cpu_to_be_16(ETHER_TYPE_VLAN)) - return -1; - - struct vlan_hdr *vh = (struct vlan_hdr *)(eh + 1); - m->ol_flags |= PKT_RX_VLAN_PKT; - m->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci); - - /* Copy ether header over rather than moving whole packet */ - memmove(rte_pktmbuf_adj(m, sizeof(struct vlan_hdr)), - eh, 2 * ETHER_ADDR_LEN); - - return 0; -} - -/** - * Insert VLAN tag into mbuf. - * - * Software version of VLAN unstripping - * - * @param m - * The packet mbuf. - * @return - * - 0: On success - * -EPERM: mbuf is is shared overwriting would be unsafe - * -ENOSPC: not enough headroom in mbuf - */ -static inline int rte_vlan_insert(struct rte_mbuf **m) -{ - struct ether_hdr *oh, *nh; - struct vlan_hdr *vh; - - /* Can't insert header if mbuf is shared */ - if (rte_mbuf_refcnt_read(*m) > 1) { - struct rte_mbuf *copy; - - copy = rte_pktmbuf_clone(*m, (*m)->pool); - if (unlikely(copy == NULL)) - return -ENOMEM; - rte_pktmbuf_free(*m); - *m = copy; - } - - oh = rte_pktmbuf_mtod(*m, struct ether_hdr *); - nh = (struct ether_hdr *) - rte_pktmbuf_prepend(*m, sizeof(struct vlan_hdr)); - if (nh == NULL) - return -ENOSPC; - - memmove(nh, oh, 2 * ETHER_ADDR_LEN); - nh->ether_type = rte_cpu_to_be_16(ETHER_TYPE_VLAN); - - vh = (struct vlan_hdr *) (nh + 1); - vh->vlan_tci = rte_cpu_to_be_16((*m)->vlan_tci); - - return 0; -} - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_ETHER_H_ */ diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map index 45ddf44c..72be66d8 100644 --- a/lib/librte_ether/rte_ether_version.map +++ b/lib/librte_ether/rte_ether_version.map @@ -78,7 +78,6 @@ DPDK_2.2 { rte_eth_dev_vlan_filter; rte_eth_dev_wd_timeout_store; rte_eth_dma_zone_reserve; - rte_eth_driver_register; rte_eth_led_off; rte_eth_led_on; rte_eth_link; @@ -138,4 +137,13 @@ DPDK_16.07 { rte_eth_dev_get_name_by_port; rte_eth_dev_get_port_by_name; rte_eth_xstats_get_names; + } DPDK_16.04; + +DPDK_16.11 { + global: + + rte_eth_dev_pci_probe; + rte_eth_dev_pci_remove; + +} DPDK_16.07; diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index d6e68c68..51db006a 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -98,6 +98,7 @@ rte_hash_find_existing(const char *name) void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func) { + h->cmp_jump_table_idx = KEY_CUSTOM; h->rte_hash_custom_cmp_eq = func; } @@ -283,6 +284,15 @@ rte_hash_create(const struct rte_hash_parameters *params) h->free_slots = r; h->hw_trans_mem_support = hw_trans_mem_support; +#if defined(RTE_ARCH_X86) + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) + h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2; + else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2)) + h->sig_cmp_fn = RTE_HASH_COMPARE_SSE; + else +#endif + h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR; + /* Turn on multi-writer only with explicit flat from user and TM * support. */ @@ -421,10 +431,10 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { /* Search for space in alternative locations */ - next_bucket_idx = bkt->signatures[i].alt & h->bucket_bitmask; + next_bucket_idx = bkt->sig_alt[i] & h->bucket_bitmask; next_bkt[i] = &h->buckets[next_bucket_idx]; for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) { - if (next_bkt[i]->signatures[j].sig == NULL_SIGNATURE) + if (next_bkt[i]->key_idx[j] == EMPTY_SLOT) break; } @@ -434,8 +444,8 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) /* Alternative location has spare room (end of recursive function) */ if (i != RTE_HASH_BUCKET_ENTRIES) { - next_bkt[i]->signatures[j].alt = bkt->signatures[i].current; - next_bkt[i]->signatures[j].current = bkt->signatures[i].alt; + next_bkt[i]->sig_alt[j] = bkt->sig_current[i]; + next_bkt[i]->sig_current[j] = bkt->sig_alt[i]; next_bkt[i]->key_idx[j] = bkt->key_idx[i]; return i; } @@ -464,8 +474,8 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) bkt->flag[i] = 0; nr_pushes = 0; if (ret >= 0) { - next_bkt[i]->signatures[ret].alt = bkt->signatures[i].current; - next_bkt[i]->signatures[ret].current = bkt->signatures[i].alt; + next_bkt[i]->sig_alt[ret] = bkt->sig_current[i]; + next_bkt[i]->sig_current[ret] = bkt->sig_alt[i]; next_bkt[i]->key_idx[ret] = bkt->key_idx[i]; return i; } else @@ -547,8 +557,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is already inserted in primary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (prim_bkt->signatures[i].current == sig && - prim_bkt->signatures[i].alt == alt_hash) { + if (prim_bkt->sig_current[i] == sig && + prim_bkt->sig_alt[i] == alt_hash) { k = (struct rte_hash_key *) ((char *)keys + prim_bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -567,8 +577,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is already inserted in secondary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (sec_bkt->signatures[i].alt == sig && - sec_bkt->signatures[i].current == alt_hash) { + if (sec_bkt->sig_alt[i] == sig && + sec_bkt->sig_current[i] == alt_hash) { k = (struct rte_hash_key *) ((char *)keys + sec_bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -613,9 +623,9 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, #endif for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { /* Check if slot is available */ - if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) { - prim_bkt->signatures[i].current = sig; - prim_bkt->signatures[i].alt = alt_hash; + if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) { + prim_bkt->sig_current[i] = sig; + prim_bkt->sig_alt[i] = alt_hash; prim_bkt->key_idx[i] = new_idx; break; } @@ -635,8 +645,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, */ ret = make_space_bucket(h, prim_bkt); if (ret >= 0) { - prim_bkt->signatures[ret].current = sig; - prim_bkt->signatures[ret].alt = alt_hash; + prim_bkt->sig_current[ret] = sig; + prim_bkt->sig_alt[ret] = alt_hash; prim_bkt->key_idx[ret] = new_idx; if (h->add_key == ADD_KEY_MULTIWRITER) rte_spinlock_unlock(h->multiwriter_lock); @@ -710,8 +720,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, /* Check if key is in primary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->signatures[i].current == sig && - bkt->signatures[i].sig != NULL_SIGNATURE) { + if (bkt->sig_current[i] == sig && + bkt->key_idx[i] != EMPTY_SLOT) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -733,8 +743,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, /* Check if key is in secondary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->signatures[i].current == alt_hash && - bkt->signatures[i].alt == sig) { + if (bkt->sig_current[i] == alt_hash && + bkt->sig_alt[i] == sig) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -788,7 +798,8 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i) unsigned lcore_id, n_slots; struct lcore_cache *cached_free_slots; - bkt->signatures[i].sig = NULL_SIGNATURE; + bkt->sig_current[i] = NULL_SIGNATURE; + bkt->sig_alt[i] = NULL_SIGNATURE; if (h->hw_trans_mem_support) { lcore_id = rte_lcore_id(); cached_free_slots = &h->local_free_slots[lcore_id]; @@ -826,8 +837,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is in primary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->signatures[i].current == sig && - bkt->signatures[i].sig != NULL_SIGNATURE) { + if (bkt->sig_current[i] == sig && + bkt->key_idx[i] != EMPTY_SLOT) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -838,7 +849,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, * substracting the first dummy index */ ret = bkt->key_idx[i] - 1; - bkt->key_idx[i] = 0; + bkt->key_idx[i] = EMPTY_SLOT; return ret; } } @@ -851,8 +862,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, /* Check if key is in secondary location */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (bkt->signatures[i].current == alt_hash && - bkt->signatures[i].sig != NULL_SIGNATURE) { + if (bkt->sig_current[i] == alt_hash && + bkt->key_idx[i] != EMPTY_SLOT) { k = (struct rte_hash_key *) ((char *)keys + bkt->key_idx[i] * h->key_entry_size); if (rte_hash_cmp_eq(key, k->key, h) == 0) { @@ -863,7 +874,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, * substracting the first dummy index */ ret = bkt->key_idx[i] - 1; - bkt->key_idx[i] = 0; + bkt->key_idx[i] = EMPTY_SLOT; return ret; } } @@ -907,280 +918,189 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position, return 0; } -/* Lookup bulk stage 0: Prefetch input key */ static inline void -lookup_stage0(unsigned *idx, uint64_t *lookup_mask, - const void * const *keys) +compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches, + const struct rte_hash_bucket *prim_bkt, + const struct rte_hash_bucket *sec_bkt, + hash_sig_t prim_hash, hash_sig_t sec_hash, + enum rte_hash_sig_compare_function sig_cmp_fn) { - *idx = __builtin_ctzl(*lookup_mask); - if (*lookup_mask == 0) - *idx = 0; + unsigned int i; + + switch (sig_cmp_fn) { +#ifdef RTE_MACHINE_CPUFLAG_AVX2 + case RTE_HASH_COMPARE_AVX2: + *prim_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32( + _mm256_load_si256( + (__m256i const *)prim_bkt->sig_current), + _mm256_set1_epi32(prim_hash))); + *sec_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32( + _mm256_load_si256( + (__m256i const *)sec_bkt->sig_current), + _mm256_set1_epi32(sec_hash))); + break; +#endif +#ifdef RTE_MACHINE_CPUFLAG_SSE2 + case RTE_HASH_COMPARE_SSE: + /* Compare the first 4 signatures in the bucket */ + *prim_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)prim_bkt->sig_current), + _mm_set1_epi32(prim_hash))); + *prim_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)&prim_bkt->sig_current[4]), + _mm_set1_epi32(prim_hash)))) << 4; + /* Compare the first 4 signatures in the bucket */ + *sec_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)sec_bkt->sig_current), + _mm_set1_epi32(sec_hash))); + *sec_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16( + _mm_load_si128( + (__m128i const *)&sec_bkt->sig_current[4]), + _mm_set1_epi32(sec_hash)))) << 4; + break; +#endif + default: + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + *prim_hash_matches |= + ((prim_hash == prim_bkt->sig_current[i]) << i); + *sec_hash_matches |= + ((sec_hash == sec_bkt->sig_current[i]) << i); + } + } - rte_prefetch0(keys[*idx]); - *lookup_mask &= ~(1llu << *idx); } -/* - * Lookup bulk stage 1: Calculate primary/secondary hashes - * and prefetch primary/secondary buckets - */ +#define PREFETCH_OFFSET 4 static inline void -lookup_stage1(unsigned idx, hash_sig_t *prim_hash, hash_sig_t *sec_hash, - const struct rte_hash_bucket **primary_bkt, - const struct rte_hash_bucket **secondary_bkt, - hash_sig_t *hash_vals, const void * const *keys, - const struct rte_hash *h) +__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, + int32_t num_keys, int32_t *positions, + uint64_t *hit_mask, void *data[]) { - *prim_hash = rte_hash_hash(h, keys[idx]); - hash_vals[idx] = *prim_hash; - *sec_hash = rte_hash_secondary_hash(*prim_hash); + uint64_t hits = 0; + int32_t i; + uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX]; + uint32_t sec_hash[RTE_HASH_LOOKUP_BULK_MAX]; + const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX]; + const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX]; + uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0}; + uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0}; + + /* Prefetch first keys */ + for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++) + rte_prefetch0(keys[i]); - *primary_bkt = &h->buckets[*prim_hash & h->bucket_bitmask]; - *secondary_bkt = &h->buckets[*sec_hash & h->bucket_bitmask]; + /* + * Prefetch rest of the keys, calculate primary and + * secondary bucket and prefetch them + */ + for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) { + rte_prefetch0(keys[i + PREFETCH_OFFSET]); - rte_prefetch0(*primary_bkt); - rte_prefetch0(*secondary_bkt); -} + prim_hash[i] = rte_hash_hash(h, keys[i]); + sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]); -/* - * Lookup bulk stage 2: Search for match hashes in primary/secondary locations - * and prefetch first key slot - */ -static inline void -lookup_stage2(unsigned idx, hash_sig_t prim_hash, hash_sig_t sec_hash, - const struct rte_hash_bucket *prim_bkt, - const struct rte_hash_bucket *sec_bkt, - const struct rte_hash_key **key_slot, int32_t *positions, - uint64_t *extra_hits_mask, const void *keys, - const struct rte_hash *h) -{ - unsigned prim_hash_matches, sec_hash_matches, key_idx, i; - unsigned total_hash_matches; + primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask]; + secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask]; - prim_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES; - sec_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES; - for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - prim_hash_matches |= ((prim_hash == prim_bkt->signatures[i].current) << i); - sec_hash_matches |= ((sec_hash == sec_bkt->signatures[i].current) << i); + rte_prefetch0(primary_bkt[i]); + rte_prefetch0(secondary_bkt[i]); } - key_idx = prim_bkt->key_idx[__builtin_ctzl(prim_hash_matches)]; - if (key_idx == 0) - key_idx = sec_bkt->key_idx[__builtin_ctzl(sec_hash_matches)]; + /* Calculate and prefetch rest of the buckets */ + for (; i < num_keys; i++) { + prim_hash[i] = rte_hash_hash(h, keys[i]); + sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]); - total_hash_matches = (prim_hash_matches | - (sec_hash_matches << (RTE_HASH_BUCKET_ENTRIES + 1))); - *key_slot = (const struct rte_hash_key *) ((const char *)keys + - key_idx * h->key_entry_size); + primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask]; + secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask]; - rte_prefetch0(*key_slot); - /* - * Return index where key is stored, - * substracting the first dummy index - */ - positions[idx] = (key_idx - 1); + rte_prefetch0(primary_bkt[i]); + rte_prefetch0(secondary_bkt[i]); + } - *extra_hits_mask |= (uint64_t)(__builtin_popcount(total_hash_matches) > 3) << idx; + /* Compare signatures and prefetch key slot of first hit */ + for (i = 0; i < num_keys; i++) { + compare_signatures(&prim_hitmask[i], &sec_hitmask[i], + primary_bkt[i], secondary_bkt[i], + prim_hash[i], sec_hash[i], h->sig_cmp_fn); + + if (prim_hitmask[i]) { + uint32_t first_hit = __builtin_ctzl(prim_hitmask[i]); + uint32_t key_idx = primary_bkt[i]->key_idx[first_hit]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + rte_prefetch0(key_slot); + continue; + } -} + if (sec_hitmask[i]) { + uint32_t first_hit = __builtin_ctzl(sec_hitmask[i]); + uint32_t key_idx = secondary_bkt[i]->key_idx[first_hit]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + rte_prefetch0(key_slot); + } + } + /* Compare keys, first hits in primary first */ + for (i = 0; i < num_keys; i++) { + positions[i] = -ENOENT; + while (prim_hitmask[i]) { + uint32_t hit_index = __builtin_ctzl(prim_hitmask[i]); + + uint32_t key_idx = primary_bkt[i]->key_idx[hit_index]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + /* + * If key index is 0, do not compare key, + * as it is checking the dummy slot + */ + if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) { + if (data != NULL) + data[i] = key_slot->pdata; -/* Lookup bulk stage 3: Check if key matches, update hit mask and return data */ -static inline void -lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * const *keys, - const int32_t *positions, void *data[], uint64_t *hits, - const struct rte_hash *h) -{ - unsigned hit; - unsigned key_idx; + hits |= 1ULL << i; + positions[i] = key_idx - 1; + goto next_key; + } + prim_hitmask[i] &= ~(1 << (hit_index)); + } - hit = !rte_hash_cmp_eq(key_slot->key, keys[idx], h); - if (data != NULL) - data[idx] = key_slot->pdata; + while (sec_hitmask[i]) { + uint32_t hit_index = __builtin_ctzl(sec_hitmask[i]); - key_idx = positions[idx] + 1; - /* - * If key index is 0, force hit to be 0, in case key to be looked up - * is all zero (as in the dummy slot), which would result in a wrong hit - */ - *hits |= (uint64_t)(hit && !!key_idx) << idx; -} + uint32_t key_idx = secondary_bkt[i]->key_idx[hit_index]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + /* + * If key index is 0, do not compare key, + * as it is checking the dummy slot + */ -static inline void -__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, - uint32_t num_keys, int32_t *positions, - uint64_t *hit_mask, void *data[]) -{ - uint64_t hits = 0; - uint64_t extra_hits_mask = 0; - uint64_t lookup_mask, miss_mask; - unsigned idx; - const void *key_store = h->key_store; - int ret; - hash_sig_t hash_vals[RTE_HASH_LOOKUP_BULK_MAX]; - - unsigned idx00, idx01, idx10, idx11, idx20, idx21, idx30, idx31; - const struct rte_hash_bucket *primary_bkt10, *primary_bkt11; - const struct rte_hash_bucket *secondary_bkt10, *secondary_bkt11; - const struct rte_hash_bucket *primary_bkt20, *primary_bkt21; - const struct rte_hash_bucket *secondary_bkt20, *secondary_bkt21; - const struct rte_hash_key *k_slot20, *k_slot21, *k_slot30, *k_slot31; - hash_sig_t primary_hash10, primary_hash11; - hash_sig_t secondary_hash10, secondary_hash11; - hash_sig_t primary_hash20, primary_hash21; - hash_sig_t secondary_hash20, secondary_hash21; - - lookup_mask = (uint64_t) -1 >> (64 - num_keys); - miss_mask = lookup_mask; - - lookup_stage0(&idx00, &lookup_mask, keys); - lookup_stage0(&idx01, &lookup_mask, keys); - - idx10 = idx00, idx11 = idx01; - - lookup_stage0(&idx00, &lookup_mask, keys); - lookup_stage0(&idx01, &lookup_mask, keys); - lookup_stage1(idx10, &primary_hash10, &secondary_hash10, - &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); - lookup_stage1(idx11, &primary_hash11, &secondary_hash11, - &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); - - primary_bkt20 = primary_bkt10; - primary_bkt21 = primary_bkt11; - secondary_bkt20 = secondary_bkt10; - secondary_bkt21 = secondary_bkt11; - primary_hash20 = primary_hash10; - primary_hash21 = primary_hash11; - secondary_hash20 = secondary_hash10; - secondary_hash21 = secondary_hash11; - idx20 = idx10, idx21 = idx11; - idx10 = idx00, idx11 = idx01; - - lookup_stage0(&idx00, &lookup_mask, keys); - lookup_stage0(&idx01, &lookup_mask, keys); - lookup_stage1(idx10, &primary_hash10, &secondary_hash10, - &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); - lookup_stage1(idx11, &primary_hash11, &secondary_hash11, - &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); - lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, - secondary_bkt20, &k_slot20, positions, &extra_hits_mask, - key_store, h); - lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, - secondary_bkt21, &k_slot21, positions, &extra_hits_mask, - key_store, h); - - while (lookup_mask) { - k_slot30 = k_slot20, k_slot31 = k_slot21; - idx30 = idx20, idx31 = idx21; - primary_bkt20 = primary_bkt10; - primary_bkt21 = primary_bkt11; - secondary_bkt20 = secondary_bkt10; - secondary_bkt21 = secondary_bkt11; - primary_hash20 = primary_hash10; - primary_hash21 = primary_hash11; - secondary_hash20 = secondary_hash10; - secondary_hash21 = secondary_hash11; - idx20 = idx10, idx21 = idx11; - idx10 = idx00, idx11 = idx01; - - lookup_stage0(&idx00, &lookup_mask, keys); - lookup_stage0(&idx01, &lookup_mask, keys); - lookup_stage1(idx10, &primary_hash10, &secondary_hash10, - &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); - lookup_stage1(idx11, &primary_hash11, &secondary_hash11, - &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); - lookup_stage2(idx20, primary_hash20, secondary_hash20, - primary_bkt20, secondary_bkt20, &k_slot20, positions, - &extra_hits_mask, key_store, h); - lookup_stage2(idx21, primary_hash21, secondary_hash21, - primary_bkt21, secondary_bkt21, &k_slot21, positions, - &extra_hits_mask, key_store, h); - lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); - } + if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) { + if (data != NULL) + data[i] = key_slot->pdata; - k_slot30 = k_slot20, k_slot31 = k_slot21; - idx30 = idx20, idx31 = idx21; - primary_bkt20 = primary_bkt10; - primary_bkt21 = primary_bkt11; - secondary_bkt20 = secondary_bkt10; - secondary_bkt21 = secondary_bkt11; - primary_hash20 = primary_hash10; - primary_hash21 = primary_hash11; - secondary_hash20 = secondary_hash10; - secondary_hash21 = secondary_hash11; - idx20 = idx10, idx21 = idx11; - idx10 = idx00, idx11 = idx01; - - lookup_stage1(idx10, &primary_hash10, &secondary_hash10, - &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); - lookup_stage1(idx11, &primary_hash11, &secondary_hash11, - &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); - lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, - secondary_bkt20, &k_slot20, positions, &extra_hits_mask, - key_store, h); - lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, - secondary_bkt21, &k_slot21, positions, &extra_hits_mask, - key_store, h); - lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); - - k_slot30 = k_slot20, k_slot31 = k_slot21; - idx30 = idx20, idx31 = idx21; - primary_bkt20 = primary_bkt10; - primary_bkt21 = primary_bkt11; - secondary_bkt20 = secondary_bkt10; - secondary_bkt21 = secondary_bkt11; - primary_hash20 = primary_hash10; - primary_hash21 = primary_hash11; - secondary_hash20 = secondary_hash10; - secondary_hash21 = secondary_hash11; - idx20 = idx10, idx21 = idx11; - - lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, - secondary_bkt20, &k_slot20, positions, &extra_hits_mask, - key_store, h); - lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, - secondary_bkt21, &k_slot21, positions, &extra_hits_mask, - key_store, h); - lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); - - k_slot30 = k_slot20, k_slot31 = k_slot21; - idx30 = idx20, idx31 = idx21; - - lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); - lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); - - /* ignore any items we have already found */ - extra_hits_mask &= ~hits; - - if (unlikely(extra_hits_mask)) { - /* run a single search for each remaining item */ - do { - idx = __builtin_ctzl(extra_hits_mask); - if (data != NULL) { - ret = rte_hash_lookup_with_hash_data(h, - keys[idx], hash_vals[idx], &data[idx]); - if (ret >= 0) - hits |= 1ULL << idx; - } else { - positions[idx] = rte_hash_lookup_with_hash(h, - keys[idx], hash_vals[idx]); - if (positions[idx] >= 0) - hits |= 1llu << idx; + hits |= 1ULL << i; + positions[i] = key_idx - 1; + goto next_key; } - extra_hits_mask &= ~(1llu << idx); - } while (extra_hits_mask); - } + sec_hitmask[i] &= ~(1 << (hit_index)); + } - miss_mask &= ~hits; - if (unlikely(miss_mask)) { - do { - idx = __builtin_ctzl(miss_mask); - positions[idx] = -ENOENT; - miss_mask &= ~(1llu << idx); - } while (miss_mask); +next_key: + continue; } if (hit_mask != NULL) @@ -1233,7 +1153,7 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32 idx = *next % RTE_HASH_BUCKET_ENTRIES; /* If current position is empty, go to the next one */ - while (h->buckets[bucket_idx].signatures[idx].sig == NULL_SIGNATURE) { + while (h->buckets[bucket_idx].key_idx[idx] == EMPTY_SLOT) { (*next)++; /* End of table */ if (*next == total_entries) diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h index 9625fffe..1b8ffed8 100644 --- a/lib/librte_hash/rte_cuckoo_hash.h +++ b/lib/librte_hash/rte_cuckoo_hash.h @@ -130,10 +130,12 @@ enum add_key_case { }; /** Number of items per bucket. */ -#define RTE_HASH_BUCKET_ENTRIES 4 +#define RTE_HASH_BUCKET_ENTRIES 8 #define NULL_SIGNATURE 0 +#define EMPTY_SLOT 0 + #define KEY_ALIGNMENT 16 #define LCORE_CACHE_SIZE 64 @@ -151,17 +153,6 @@ struct lcore_cache { void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */ } __rte_cache_aligned; -/* Structure storing both primary and secondary hashes */ -struct rte_hash_signatures { - union { - struct { - hash_sig_t current; - hash_sig_t alt; - }; - uint64_t sig; - }; -}; - /* Structure that stores key-value pair */ struct rte_hash_key { union { @@ -172,11 +163,22 @@ struct rte_hash_key { char key[0]; } __attribute__((aligned(KEY_ALIGNMENT))); +/* All different signature compare functions */ +enum rte_hash_sig_compare_function { + RTE_HASH_COMPARE_SCALAR = 0, + RTE_HASH_COMPARE_SSE, + RTE_HASH_COMPARE_AVX2, + RTE_HASH_COMPARE_NUM +}; + /** Bucket structure */ struct rte_hash_bucket { - struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES]; - /* Includes dummy key index that always contains index 0 */ - uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1]; + hash_sig_t sig_current[RTE_HASH_BUCKET_ENTRIES]; + + uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES]; + + hash_sig_t sig_alt[RTE_HASH_BUCKET_ENTRIES]; + uint8_t flag[RTE_HASH_BUCKET_ENTRIES]; } __rte_cache_aligned; @@ -185,30 +187,38 @@ struct rte_hash { char name[RTE_HASH_NAMESIZE]; /**< Name of the hash. */ uint32_t entries; /**< Total table entries. */ uint32_t num_buckets; /**< Number of buckets in table. */ - uint32_t key_len; /**< Length of hash key. */ + + struct rte_ring *free_slots; + /**< Ring that stores all indexes of the free slots in the key table */ + uint8_t hw_trans_mem_support; + /**< Hardware transactional memory support */ + struct lcore_cache *local_free_slots; + /**< Local cache per lcore, storing some indexes of the free slots */ + enum add_key_case add_key; /**< Multi-writer hash add behavior */ + + rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */ + + /* Fields used in lookup */ + + uint32_t key_len __rte_cache_aligned; + /**< Length of hash key. */ rte_hash_function hash_func; /**< Function used to calculate hash. */ uint32_t hash_func_init_val; /**< Init value used by hash_func. */ rte_hash_cmp_eq_t rte_hash_custom_cmp_eq; /**< Custom function used to compare keys. */ enum cmp_jump_table_case cmp_jump_table_idx; /**< Indicates which compare function to use. */ - uint32_t bucket_bitmask; /**< Bitmask for getting bucket index - from hash signature. */ + enum rte_hash_sig_compare_function sig_cmp_fn; + /**< Indicates which signature compare function to use. */ + uint32_t bucket_bitmask; + /**< Bitmask for getting bucket index from hash signature. */ uint32_t key_entry_size; /**< Size of each key entry. */ - struct rte_ring *free_slots; /**< Ring that stores all indexes - of the free slots in the key table */ void *key_store; /**< Table storing all keys and data */ - struct rte_hash_bucket *buckets; /**< Table with buckets storing all the - hash values and key indexes - to the key table*/ - uint8_t hw_trans_mem_support; /**< Hardware transactional - memory support */ - struct lcore_cache *local_free_slots; - /**< Local cache per lcore, storing some indexes of the free slots */ - enum add_key_case add_key; /**< Multi-writer hash add behavior */ - - rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */ + struct rte_hash_bucket *buckets; + /**< Table with buckets storing all the hash values and key indexes + * to the key table. + */ } __rte_cache_aligned; struct queue_node { diff --git a/lib/librte_hash/rte_cuckoo_hash_x86.h b/lib/librte_hash/rte_cuckoo_hash_x86.h index ace1bd2e..0c94244a 100644 --- a/lib/librte_hash/rte_cuckoo_hash_x86.h +++ b/lib/librte_hash/rte_cuckoo_hash_x86.h @@ -53,10 +53,9 @@ rte_hash_cuckoo_insert_mw_tm(struct rte_hash_bucket *prim_bkt, */ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { /* Check if slot is available */ - if (likely(prim_bkt->signatures[i].sig == - NULL_SIGNATURE)) { - prim_bkt->signatures[i].current = sig; - prim_bkt->signatures[i].alt = alt_hash; + if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) { + prim_bkt->sig_current[i] = sig; + prim_bkt->sig_alt[i] = alt_hash; prim_bkt->key_idx[i] = new_idx; break; } @@ -102,7 +101,7 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h, prev_slot = curr_node->prev_slot; prev_alt_bkt_idx - = prev_bkt->signatures[prev_slot].alt + = prev_bkt->sig_alt[prev_slot] & h->bucket_bitmask; if (unlikely(&h->buckets[prev_alt_bkt_idx] @@ -114,10 +113,10 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h, * Cuckoo insert to move elements back to its * primary bucket if available */ - curr_bkt->signatures[curr_slot].alt = - prev_bkt->signatures[prev_slot].current; - curr_bkt->signatures[curr_slot].current = - prev_bkt->signatures[prev_slot].alt; + curr_bkt->sig_alt[curr_slot] = + prev_bkt->sig_current[prev_slot]; + curr_bkt->sig_current[curr_slot] = + prev_bkt->sig_alt[prev_slot]; curr_bkt->key_idx[curr_slot] = prev_bkt->key_idx[prev_slot]; @@ -126,8 +125,8 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h, curr_bkt = curr_node->bkt; } - curr_bkt->signatures[curr_slot].current = sig; - curr_bkt->signatures[curr_slot].alt = alt_hash; + curr_bkt->sig_current[curr_slot] = sig; + curr_bkt->sig_alt[curr_slot] = alt_hash; curr_bkt->key_idx[curr_slot] = new_idx; rte_xend(); @@ -172,7 +171,7 @@ rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h, RTE_HASH_BUCKET_ENTRIES)) { curr_bkt = tail->bkt; for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { - if (curr_bkt->signatures[i].sig == NULL_SIGNATURE) { + if (curr_bkt->key_idx[i] == EMPTY_SLOT) { if (likely(rte_hash_cuckoo_move_insert_mw_tm(h, tail, i, sig, alt_hash, new_idx) == 0)) @@ -180,7 +179,7 @@ rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h, } /* Enqueue new node and keep prev node info */ - alt_bkt = &(h->buckets[curr_bkt->signatures[i].alt + alt_bkt = &(h->buckets[curr_bkt->sig_alt[i] & h->bucket_bitmask]); head->bkt = alt_bkt; head->prev = tail; diff --git a/lib/librte_hash/rte_fbk_hash.h b/lib/librte_hash/rte_fbk_hash.h index a430961d..bd46048f 100644 --- a/lib/librte_hash/rte_fbk_hash.h +++ b/lib/librte_hash/rte_fbk_hash.h @@ -115,7 +115,7 @@ struct rte_fbk_hash_table { uint32_t init_val; /**< For initialising hash function. */ /** A flat table of all buckets. */ - union rte_fbk_hash_entry t[0]; + union rte_fbk_hash_entry t[]; }; /** diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h index d98e98e7..a4886a8c 100644 --- a/lib/librte_hash/rte_thash.h +++ b/lib/librte_hash/rte_thash.h @@ -54,6 +54,7 @@ extern "C" { #include #include #include +#include #ifdef __SSE3__ #include @@ -102,6 +103,7 @@ static const __m128i rte_thash_ipv6_bswap_mask = { struct rte_ipv4_tuple { uint32_t src_addr; uint32_t dst_addr; + RTE_STD_C11 union { struct { uint16_t dport; @@ -119,6 +121,7 @@ struct rte_ipv4_tuple { struct rte_ipv6_tuple { uint8_t src_addr[16]; uint8_t dst_addr[16]; + RTE_STD_C11 union { struct { uint16_t dport; diff --git a/lib/librte_ip_frag/Makefile b/lib/librte_ip_frag/Makefile index e97dfbd3..43f8b1e3 100644 --- a/lib/librte_ip_frag/Makefile +++ b/lib/librte_ip_frag/Makefile @@ -54,6 +54,7 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += rte_ip_frag.h DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_eal DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_ether +DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_hash DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mbuf DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mempool diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h index 9ac7081c..6708906d 100644 --- a/lib/librte_ip_frag/rte_ip_frag.h +++ b/lib/librte_ip_frag/rte_ip_frag.h @@ -124,7 +124,7 @@ struct rte_ip_frag_tbl { struct ip_frag_pkt *last; /**< last used entry. */ struct ip_pkt_list lru; /**< LRU list for table entries. */ struct ip_frag_tbl_stat stat; /**< statistics counters. */ - struct ip_frag_pkt pkt[0]; /**< hash table. */ + __extension__ struct ip_frag_pkt pkt[0]; /**< hash table. */ }; /** IPv6 fragment extension header */ diff --git a/lib/librte_ivshmem/Makefile b/lib/librte_ivshmem/Makefile deleted file mode 100644 index c099438c..00000000 --- a/lib/librte_ivshmem/Makefile +++ /dev/null @@ -1,54 +0,0 @@ -# BSD LICENSE -# -# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Intel Corporation nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -include $(RTE_SDK)/mk/rte.vars.mk - -# library name -LIB = librte_ivshmem.a - -CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 - -EXPORT_MAP := rte_ivshmem_version.map - -LIBABIVER := 1 - -# all source are stored in SRCS-y -SRCS-$(CONFIG_RTE_LIBRTE_IVSHMEM) := rte_ivshmem.c - -# install includes -SYMLINK-$(CONFIG_RTE_LIBRTE_IVSHMEM)-include := rte_ivshmem.h - -# this lib needs EAL, ring and mempool -DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_eal -DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_ring -DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_mempool - -include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_ivshmem/rte_ivshmem.c b/lib/librte_ivshmem/rte_ivshmem.c deleted file mode 100644 index c26edb61..00000000 --- a/lib/librte_ivshmem/rte_ivshmem.c +++ /dev/null @@ -1,919 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "rte_ivshmem.h" - -#define IVSHMEM_CONFIG_FILE_FMT "/var/run/.dpdk_ivshmem_metadata_%s" -#define IVSHMEM_QEMU_CMD_LINE_HEADER_FMT "-device ivshmem,size=%" PRIu64 "M,shm=fd%s" -#define IVSHMEM_QEMU_CMD_FD_FMT ":%s:0x%" PRIx64 ":0x%" PRIx64 -#define IVSHMEM_QEMU_CMDLINE_BUFSIZE 1024 -#define IVSHMEM_MAX_PAGES (1 << 12) -#define adjacent(x,y) (((x).phys_addr+(x).len)==(y).phys_addr) -#define METADATA_SIZE_ALIGNED \ - (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz)) - -#define GET_PAGEMAP_ADDR(in,addr,dlm,err) \ -{ \ - char *end; \ - errno = 0; \ - addr = strtoull((in), &end, 16); \ - if (errno != 0 || *end != (dlm)) { \ - RTE_LOG(ERR, EAL, err); \ - goto error; \ - } \ - (in) = end + 1; \ -} - -static int pagesz; - -struct memseg_cache_entry { - char filepath[PATH_MAX]; - uint64_t offset; - uint64_t len; -}; - -struct ivshmem_config { - struct rte_ivshmem_metadata * metadata; - struct memseg_cache_entry memseg_cache[IVSHMEM_MAX_PAGES]; - /**< account for multiple files per segment case */ - struct flock lock; - rte_spinlock_t sl; -}; - -static struct ivshmem_config -ivshmem_global_config[RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES]; - -static rte_spinlock_t global_cfg_sl; - -static struct ivshmem_config * -get_config_by_name(const char * name) -{ - struct rte_ivshmem_metadata * config; - unsigned i; - - for (i = 0; i < RTE_DIM(ivshmem_global_config); i++) { - config = ivshmem_global_config[i].metadata; - if (config == NULL) - return NULL; - if (strncmp(name, config->name, IVSHMEM_NAME_LEN) == 0) - return &ivshmem_global_config[i]; - } - - return NULL; -} - -static int -overlap(const struct rte_memzone * s1, const struct rte_memzone * s2) -{ - uint64_t start1, end1, start2, end2; - - start1 = s1->addr_64; - end1 = s1->addr_64 + s1->len; - start2 = s2->addr_64; - end2 = s2->addr_64 + s2->len; - - if (start1 >= start2 && start1 < end2) - return 1; - if (start2 >= start1 && start2 < end1) - return 1; - - return 0; -} - -static struct rte_memzone * -get_memzone_by_addr(const void * addr) -{ - struct rte_memzone * tmp, * mz; - struct rte_mem_config * mcfg; - int i; - - mcfg = rte_eal_get_configuration()->mem_config; - mz = NULL; - - /* find memzone for the ring */ - for (i = 0; i < RTE_MAX_MEMZONE; i++) { - tmp = &mcfg->memzone[i]; - - if (tmp->addr_64 == (uint64_t) addr) { - mz = tmp; - break; - } - } - - return mz; -} - -static int -entry_compare(const void * a, const void * b) -{ - const struct rte_ivshmem_metadata_entry * e1 = - (const struct rte_ivshmem_metadata_entry*) a; - const struct rte_ivshmem_metadata_entry * e2 = - (const struct rte_ivshmem_metadata_entry*) b; - - /* move unallocated zones to the end */ - if (e1->mz.addr == NULL && e2->mz.addr == NULL) - return 0; - if (e1->mz.addr == 0) - return 1; - if (e2->mz.addr == 0) - return -1; - - return e1->mz.phys_addr > e2->mz.phys_addr; -} - -/* fills hugepage cache entry for a given start virt_addr */ -static int -get_hugefile_by_virt_addr(uint64_t virt_addr, struct memseg_cache_entry * e) -{ - uint64_t start_addr, end_addr; - char *start,*path_end; - char buf[PATH_MAX*2]; - FILE *f; - - start = NULL; - path_end = NULL; - start_addr = 0; - - memset(e->filepath, 0, sizeof(e->filepath)); - - /* open /proc/self/maps */ - f = fopen("/proc/self/maps", "r"); - if (f == NULL) { - RTE_LOG(ERR, EAL, "cannot open /proc/self/maps!\n"); - return -1; - } - - /* parse maps */ - while (fgets(buf, sizeof(buf), f) != NULL) { - - /* get endptr to end of start addr */ - start = buf; - - GET_PAGEMAP_ADDR(start,start_addr,'-', - "Cannot find start address in maps!\n"); - - /* if start address is bigger than our address, skip */ - if (start_addr > virt_addr) - continue; - - GET_PAGEMAP_ADDR(start,end_addr,' ', - "Cannot find end address in maps!\n"); - - /* if end address is less than our address, skip */ - if (end_addr <= virt_addr) - continue; - - /* find where the path starts */ - start = strstr(start, "/"); - - if (start == NULL) - continue; - - /* at this point, we know that this is our map. - * now let's find the file */ - path_end = strstr(start, "\n"); - break; - } - - if (path_end == NULL) { - RTE_LOG(ERR, EAL, "Hugefile path not found!\n"); - goto error; - } - - /* calculate offset and copy the file path */ - snprintf(e->filepath, RTE_PTR_DIFF(path_end, start) + 1, "%s", start); - - e->offset = virt_addr - start_addr; - - fclose(f); - - return 0; -error: - fclose(f); - return -1; -} - -/* - * This is a complex function. What it does is the following: - * 1. Goes through metadata and gets list of hugepages involved - * 2. Sorts the hugepages by size (1G first) - * 3. Goes through metadata again and writes correct offsets - * 4. Goes through pages and finds out their filenames, offsets etc. - */ -static int -build_config(struct rte_ivshmem_metadata * metadata) -{ - struct rte_ivshmem_metadata_entry * e_local; - struct memseg_cache_entry * ms_local; - struct rte_memseg pages[IVSHMEM_MAX_PAGES]; - struct rte_ivshmem_metadata_entry *entry; - struct memseg_cache_entry * c_entry, * prev_entry; - struct ivshmem_config * config; - unsigned i, j, mz_iter, ms_iter; - uint64_t biggest_len; - int biggest_idx; - - /* return error if we try to use an unknown config file */ - config = get_config_by_name(metadata->name); - if (config == NULL) { - RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", metadata->name); - goto fail_e; - } - - memset(pages, 0, sizeof(pages)); - - e_local = malloc(sizeof(config->metadata->entry)); - if (e_local == NULL) - goto fail_e; - ms_local = malloc(sizeof(config->memseg_cache)); - if (ms_local == NULL) - goto fail_ms; - - - /* make local copies before doing anything */ - memcpy(e_local, config->metadata->entry, sizeof(config->metadata->entry)); - memcpy(ms_local, config->memseg_cache, sizeof(config->memseg_cache)); - - qsort(e_local, RTE_DIM(config->metadata->entry), sizeof(struct rte_ivshmem_metadata_entry), - entry_compare); - - /* first pass - collect all huge pages */ - for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) { - - entry = &e_local[mz_iter]; - - uint64_t start_addr = RTE_ALIGN_FLOOR(entry->mz.addr_64, - entry->mz.hugepage_sz); - uint64_t offset = entry->mz.addr_64 - start_addr; - uint64_t len = RTE_ALIGN_CEIL(entry->mz.len + offset, - entry->mz.hugepage_sz); - - if (entry->mz.addr_64 == 0 || start_addr == 0 || len == 0) - continue; - - int start_page; - - /* find first unused page - mz are phys_addr sorted so we don't have to - * look out for holes */ - for (i = 0; i < RTE_DIM(pages); i++) { - - /* skip if we already have this page */ - if (pages[i].addr_64 == start_addr) { - start_addr += entry->mz.hugepage_sz; - len -= entry->mz.hugepage_sz; - continue; - } - /* we found a new page */ - else if (pages[i].addr_64 == 0) { - start_page = i; - break; - } - } - if (i == RTE_DIM(pages)) { - RTE_LOG(ERR, EAL, "Cannot find unused page!\n"); - goto fail; - } - - /* populate however many pages the memzone has */ - for (i = start_page; i < RTE_DIM(pages) && len != 0; i++) { - - pages[i].addr_64 = start_addr; - pages[i].len = entry->mz.hugepage_sz; - start_addr += entry->mz.hugepage_sz; - len -= entry->mz.hugepage_sz; - } - /* if there's still length left */ - if (len != 0) { - RTE_LOG(ERR, EAL, "Not enough space for pages!\n"); - goto fail; - } - } - - /* second pass - sort pages by size */ - for (i = 0; i < RTE_DIM(pages); i++) { - - if (pages[i].addr == NULL) - break; - - biggest_len = 0; - biggest_idx = -1; - - /* - * browse all entries starting at 'i', and find the - * entry with the smallest addr - */ - for (j=i; j< RTE_DIM(pages); j++) { - if (pages[j].addr == NULL) - break; - if (biggest_len == 0 || - pages[j].len > biggest_len) { - biggest_len = pages[j].len; - biggest_idx = j; - } - } - - /* should not happen */ - if (biggest_idx == -1) { - RTE_LOG(ERR, EAL, "Error sorting by size!\n"); - goto fail; - } - if (i != (unsigned) biggest_idx) { - struct rte_memseg tmp; - - memcpy(&tmp, &pages[biggest_idx], sizeof(struct rte_memseg)); - - /* we don't want to break contiguousness, so instead of just - * swapping segments, we move all the preceding segments to the - * right and then put the old segment @ biggest_idx in place of - * segment @ i */ - for (j = biggest_idx - 1; j >= i; j--) { - memcpy(&pages[j+1], &pages[j], sizeof(struct rte_memseg)); - memset(&pages[j], 0, sizeof(struct rte_memseg)); - if (j == 0) - break; - } - - /* put old biggest segment to its new place */ - memcpy(&pages[i], &tmp, sizeof(struct rte_memseg)); - } - } - - /* third pass - write correct offsets */ - for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) { - - uint64_t offset = 0; - - entry = &e_local[mz_iter]; - - if (entry->mz.addr_64 == 0) - break; - - /* find page for current memzone */ - for (i = 0; i < RTE_DIM(pages); i++) { - /* we found our page */ - if (entry->mz.addr_64 >= pages[i].addr_64 && - entry->mz.addr_64 < pages[i].addr_64 + pages[i].len) { - entry->offset = (entry->mz.addr_64 - pages[i].addr_64) + - offset; - break; - } - offset += pages[i].len; - } - if (i == RTE_DIM(pages)) { - RTE_LOG(ERR, EAL, "Page not found!\n"); - goto fail; - } - } - - ms_iter = 0; - prev_entry = NULL; - - /* fourth pass - create proper memseg cache */ - for (i = 0; i < RTE_DIM(pages) && - ms_iter <= RTE_DIM(config->memseg_cache); i++) { - if (pages[i].addr_64 == 0) - break; - - - if (ms_iter == RTE_DIM(pages)) { - RTE_LOG(ERR, EAL, "The universe has collapsed!\n"); - goto fail; - } - - c_entry = &ms_local[ms_iter]; - c_entry->len = pages[i].len; - - if (get_hugefile_by_virt_addr(pages[i].addr_64, c_entry) < 0) - goto fail; - - /* if previous entry has the same filename and is contiguous, - * clear current entry and increase previous entry's length - */ - if (prev_entry != NULL && - strncmp(c_entry->filepath, prev_entry->filepath, - sizeof(c_entry->filepath)) == 0 && - prev_entry->offset + prev_entry->len == c_entry->offset) { - prev_entry->len += pages[i].len; - memset(c_entry, 0, sizeof(struct memseg_cache_entry)); - } - else { - prev_entry = c_entry; - ms_iter++; - } - } - - /* update current configuration with new valid data */ - memcpy(config->metadata->entry, e_local, sizeof(config->metadata->entry)); - memcpy(config->memseg_cache, ms_local, sizeof(config->memseg_cache)); - - free(ms_local); - free(e_local); - - return 0; -fail: - free(ms_local); -fail_ms: - free(e_local); -fail_e: - return -1; -} - -static int -add_memzone_to_metadata(const struct rte_memzone * mz, - struct ivshmem_config * config) -{ - struct rte_ivshmem_metadata_entry * entry; - unsigned i, idx; - struct rte_mem_config *mcfg; - - if (mz->len == 0) { - RTE_LOG(ERR, EAL, "Trying to add an empty memzone\n"); - return -1; - } - - rte_spinlock_lock(&config->sl); - - mcfg = rte_eal_get_configuration()->mem_config; - - /* it prevents the memzone being freed while we add it to the metadata */ - rte_rwlock_write_lock(&mcfg->mlock); - - /* find free slot in this config */ - for (i = 0; i < RTE_DIM(config->metadata->entry); i++) { - entry = &config->metadata->entry[i]; - - if (&entry->mz.addr_64 != 0 && overlap(mz, &entry->mz)) { - RTE_LOG(ERR, EAL, "Overlapping memzones!\n"); - goto fail; - } - - /* if addr is zero, the memzone is probably free */ - if (entry->mz.addr_64 == 0) { - RTE_LOG(DEBUG, EAL, "Adding memzone '%s' at %p to metadata %s\n", - mz->name, mz->addr, config->metadata->name); - memcpy(&entry->mz, mz, sizeof(struct rte_memzone)); - - /* run config file parser */ - if (build_config(config->metadata) < 0) - goto fail; - - break; - } - } - - /* if we reached the maximum, that means we have no place in config */ - if (i == RTE_DIM(config->metadata->entry)) { - RTE_LOG(ERR, EAL, "No space left in IVSHMEM metadata %s!\n", - config->metadata->name); - goto fail; - } - - idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone); - idx = idx / sizeof(struct rte_memzone); - - /* mark the memzone not freeable */ - mcfg->memzone[idx].ioremap_addr = mz->phys_addr; - - rte_rwlock_write_unlock(&mcfg->mlock); - rte_spinlock_unlock(&config->sl); - return 0; -fail: - rte_rwlock_write_unlock(&mcfg->mlock); - rte_spinlock_unlock(&config->sl); - return -1; -} - -static int -add_ring_to_metadata(const struct rte_ring * r, - struct ivshmem_config * config) -{ - struct rte_memzone * mz; - - mz = get_memzone_by_addr(r); - - if (!mz) { - RTE_LOG(ERR, EAL, "Cannot find memzone for ring!\n"); - return -1; - } - - return add_memzone_to_metadata(mz, config); -} - -static int -add_mempool_memzone_to_metadata(const void *addr, - struct ivshmem_config *config) -{ - struct rte_memzone *mz; - - mz = get_memzone_by_addr(addr); - - if (!mz) { - RTE_LOG(ERR, EAL, "Cannot find memzone for mempool!\n"); - return -1; - } - - return add_memzone_to_metadata(mz, config); -} - -static int -add_mempool_to_metadata(const struct rte_mempool *mp, - struct ivshmem_config *config) -{ - struct rte_mempool_memhdr *memhdr; - int ret; - - ret = add_mempool_memzone_to_metadata(mp, config); - if (ret < 0) - return -1; - - STAILQ_FOREACH(memhdr, &mp->mem_list, next) { - ret = add_mempool_memzone_to_metadata(memhdr->addr, config); - if (ret < 0) - return -1; - } - - /* mempool consists of memzone and ring */ - return add_ring_to_metadata(mp->pool_data, config); -} - -int -rte_ivshmem_metadata_add_ring(const struct rte_ring * r, const char * name) -{ - struct ivshmem_config * config; - - if (name == NULL || r == NULL) - return -1; - - config = get_config_by_name(name); - - if (config == NULL) { - RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name); - return -1; - } - - return add_ring_to_metadata(r, config); -} - -int -rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz, const char * name) -{ - struct ivshmem_config * config; - - if (name == NULL || mz == NULL) - return -1; - - config = get_config_by_name(name); - - if (config == NULL) { - RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name); - return -1; - } - - return add_memzone_to_metadata(mz, config); -} - -int -rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp, const char * name) -{ - struct ivshmem_config * config; - - if (name == NULL || mp == NULL) - return -1; - - config = get_config_by_name(name); - - if (config == NULL) { - RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name); - return -1; - } - - return add_mempool_to_metadata(mp, config); -} - -static inline void -ivshmem_config_path(char *buffer, size_t bufflen, const char *name) -{ - snprintf(buffer, bufflen, IVSHMEM_CONFIG_FILE_FMT, name); -} - - - -static inline -void *ivshmem_metadata_create(const char *name, size_t size, - struct flock *lock) -{ - int retval, fd; - void *metadata_addr; - char pathname[PATH_MAX]; - - ivshmem_config_path(pathname, sizeof(pathname), name); - - fd = open(pathname, O_RDWR | O_CREAT, 0660); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open '%s'\n", pathname); - return NULL; - } - - size = METADATA_SIZE_ALIGNED; - - retval = fcntl(fd, F_SETLK, lock); - if (retval < 0){ - close(fd); - RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another " - "process using it?\n", pathname); - return NULL; - } - - retval = ftruncate(fd, size); - if (retval < 0){ - close(fd); - RTE_LOG(ERR, EAL, "Cannot resize '%s'\n", pathname); - return NULL; - } - - metadata_addr = mmap(NULL, size, - PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - - if (metadata_addr == MAP_FAILED){ - RTE_LOG(ERR, EAL, "Cannot mmap memory for '%s'\n", pathname); - - /* we don't care if we can't unlock */ - fcntl(fd, F_UNLCK, lock); - close(fd); - - return NULL; - } - - return metadata_addr; -} - -int rte_ivshmem_metadata_create(const char *name) -{ - struct ivshmem_config * ivshmem_config; - unsigned index; - - if (pagesz == 0) - pagesz = getpagesize(); - - if (name == NULL) - return -1; - - rte_spinlock_lock(&global_cfg_sl); - - for (index = 0; index < RTE_DIM(ivshmem_global_config); index++) { - if (ivshmem_global_config[index].metadata == NULL) { - ivshmem_config = &ivshmem_global_config[index]; - break; - } - } - - if (index == RTE_DIM(ivshmem_global_config)) { - RTE_LOG(ERR, EAL, "Cannot create more ivshmem config files. " - "Maximum has been reached\n"); - rte_spinlock_unlock(&global_cfg_sl); - return -1; - } - - ivshmem_config->lock.l_type = F_WRLCK; - ivshmem_config->lock.l_whence = SEEK_SET; - - ivshmem_config->lock.l_start = 0; - ivshmem_config->lock.l_len = METADATA_SIZE_ALIGNED; - - ivshmem_global_config[index].metadata = ((struct rte_ivshmem_metadata *) - ivshmem_metadata_create( - name, - sizeof(struct rte_ivshmem_metadata), - &ivshmem_config->lock)); - - if (ivshmem_global_config[index].metadata == NULL) { - rte_spinlock_unlock(&global_cfg_sl); - return -1; - } - - /* Metadata setup */ - memset(ivshmem_config->metadata, 0, sizeof(struct rte_ivshmem_metadata)); - ivshmem_config->metadata->magic_number = IVSHMEM_MAGIC; - snprintf(ivshmem_config->metadata->name, - sizeof(ivshmem_config->metadata->name), "%s", name); - - rte_spinlock_unlock(&global_cfg_sl); - - return 0; -} - -int -rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, const char *name) -{ - const struct memseg_cache_entry * ms_cache, *entry; - struct ivshmem_config * config; - char cmdline[IVSHMEM_QEMU_CMDLINE_BUFSIZE], *cmdline_ptr; - char cfg_file_path[PATH_MAX]; - unsigned remaining_len, tmplen, iter; - uint64_t shared_mem_size, zero_size, total_size; - - if (buffer == NULL || name == NULL) - return -1; - - config = get_config_by_name(name); - - if (config == NULL) { - RTE_LOG(ERR, EAL, "Config %s not found!\n", name); - return -1; - } - - rte_spinlock_lock(&config->sl); - - /* prepare metadata file path */ - snprintf(cfg_file_path, sizeof(cfg_file_path), IVSHMEM_CONFIG_FILE_FMT, - config->metadata->name); - - ms_cache = config->memseg_cache; - - cmdline_ptr = cmdline; - remaining_len = sizeof(cmdline); - - shared_mem_size = 0; - iter = 0; - - while ((ms_cache[iter].len != 0) && (iter < RTE_DIM(config->metadata->entry))) { - - entry = &ms_cache[iter]; - - /* Offset and sizes within the current pathname */ - tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT, - entry->filepath, entry->offset, entry->len); - - shared_mem_size += entry->len; - - cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen); - remaining_len -= tmplen; - - if (remaining_len == 0) { - RTE_LOG(ERR, EAL, "Command line too long!\n"); - rte_spinlock_unlock(&config->sl); - return -1; - } - - iter++; - } - - total_size = rte_align64pow2(shared_mem_size + METADATA_SIZE_ALIGNED); - zero_size = total_size - shared_mem_size - METADATA_SIZE_ALIGNED; - - /* add /dev/zero to command-line to fill the space */ - tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT, - "/dev/zero", - (uint64_t)0x0, - zero_size); - - cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen); - remaining_len -= tmplen; - - if (remaining_len == 0) { - RTE_LOG(ERR, EAL, "Command line too long!\n"); - rte_spinlock_unlock(&config->sl); - return -1; - } - - /* add metadata file to the end of command-line */ - tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT, - cfg_file_path, - (uint64_t)0x0, - METADATA_SIZE_ALIGNED); - - cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen); - remaining_len -= tmplen; - - if (remaining_len == 0) { - RTE_LOG(ERR, EAL, "Command line too long!\n"); - rte_spinlock_unlock(&config->sl); - return -1; - } - - /* if current length of the command line is bigger than the buffer supplied - * by the user, or if command-line is bigger than what IVSHMEM accepts */ - if ((sizeof(cmdline) - remaining_len) > size) { - RTE_LOG(ERR, EAL, "Buffer is too short!\n"); - rte_spinlock_unlock(&config->sl); - return -1; - } - /* complete the command-line */ - snprintf(buffer, size, - IVSHMEM_QEMU_CMD_LINE_HEADER_FMT, - total_size >> 20, - cmdline); - - rte_spinlock_unlock(&config->sl); - - return 0; -} - -void -rte_ivshmem_metadata_dump(FILE *f, const char *name) -{ - unsigned i = 0; - struct ivshmem_config * config; - struct rte_ivshmem_metadata_entry *entry; -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - uint64_t addr; - uint64_t end, hugepage_sz; - struct memseg_cache_entry e; -#endif - - if (name == NULL) - return; - - /* return error if we try to use an unknown config file */ - config = get_config_by_name(name); - if (config == NULL) { - RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name); - return; - } - - rte_spinlock_lock(&config->sl); - - entry = &config->metadata->entry[0]; - - while (entry->mz.addr != NULL && i < RTE_DIM(config->metadata->entry)) { - - fprintf(f, "Entry %u: name:<%-20s>, phys:0x%-15lx, len:0x%-15lx, " - "virt:%-15p, off:0x%-15lx\n", - i, - entry->mz.name, - entry->mz.phys_addr, - entry->mz.len, - entry->mz.addr, - entry->offset); - i++; - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - fprintf(f, "\tHugepage files:\n"); - - hugepage_sz = entry->mz.hugepage_sz; - addr = RTE_ALIGN_FLOOR(entry->mz.addr_64, hugepage_sz); - end = addr + RTE_ALIGN_CEIL(entry->mz.len + (entry->mz.addr_64 - addr), - hugepage_sz); - - for (; addr < end; addr += hugepage_sz) { - memset(&e, 0, sizeof(e)); - - get_hugefile_by_virt_addr(addr, &e); - - fprintf(f, "\t0x%"PRIx64 "-0x%" PRIx64 " offset: 0x%" PRIx64 " %s\n", - addr, addr + hugepage_sz, e.offset, e.filepath); - } -#endif - entry++; - } - - rte_spinlock_unlock(&config->sl); -} diff --git a/lib/librte_ivshmem/rte_ivshmem.h b/lib/librte_ivshmem/rte_ivshmem.h deleted file mode 100644 index a5d36d6b..00000000 --- a/lib/librte_ivshmem/rte_ivshmem.h +++ /dev/null @@ -1,165 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef RTE_IVSHMEM_H_ -#define RTE_IVSHMEM_H_ - -#include -#include - -/** - * @file - * - * The RTE IVSHMEM interface provides functions to create metadata files - * describing memory segments to be shared via QEMU IVSHMEM. - */ - - -#ifdef __cplusplus -extern "C" { -#endif - -#define IVSHMEM_MAGIC 0x0BADC0DE -#define IVSHMEM_NAME_LEN 32 - -/** - * Structure that holds IVSHMEM shared metadata entry. - */ -struct rte_ivshmem_metadata_entry { - struct rte_memzone mz; /**< shared memzone */ - uint64_t offset; /**< offset of memzone within IVSHMEM device */ -}; - -/** - * Structure that holds IVSHMEM metadata. - */ -struct rte_ivshmem_metadata { - int magic_number; /**< magic number */ - char name[IVSHMEM_NAME_LEN]; /**< name of the metadata file */ - struct rte_ivshmem_metadata_entry entry[RTE_LIBRTE_IVSHMEM_MAX_ENTRIES]; - /**< metadata entries */ -}; - -/** - * Creates metadata file with a given name - * - * @param name - * Name of metadata file to be created - * - * @return - * - On success, zero - * - On failure, a negative value - */ -int rte_ivshmem_metadata_create(const char * name); - -/** - * Adds memzone to a specific metadata file - * - * @param mz - * Memzone to be added - * @param md_name - * Name of metadata file for the memzone to be added to - * - * @return - * - On success, zero - * - On failure, a negative value - */ -int rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz, - const char * md_name); - -/** - * Adds a ring descriptor to a specific metadata file - * - * @param r - * Ring descriptor to be added - * @param md_name - * Name of metadata file for the ring to be added to - * - * @return - * - On success, zero - * - On failure, a negative value - */ -int rte_ivshmem_metadata_add_ring(const struct rte_ring * r, - const char * md_name); - -/** - * Adds a mempool to a specific metadata file - * - * @param mp - * Mempool to be added - * @param md_name - * Name of metadata file for the mempool to be added to - * - * @return - * - On success, zero - * - On failure, a negative value - */ -int rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp, - const char * md_name); - - -/** - * Generates the QEMU command-line for IVSHMEM device for a given metadata file. - * This function is to be called after all the objects were added. - * - * @param buffer - * Buffer to be filled with the command line arguments. - * @param size - * Size of the buffer. - * @param name - * Name of metadata file to generate QEMU command-line parameters for - * - * @return - * - On success, zero - * - On failure, a negative value - */ -int rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, - const char *name); - - -/** - * Dump all metadata entries from a given metadata file to the console. - * - * @param f - * A pointer to a file for output - * @name - * Name of the metadata file to be dumped to console. - */ -void rte_ivshmem_metadata_dump(FILE *f, const char *name); - - -#ifdef __cplusplus -} -#endif - -#endif /* RTE_IVSHMEM_H_ */ diff --git a/lib/librte_ivshmem/rte_ivshmem_version.map b/lib/librte_ivshmem/rte_ivshmem_version.map deleted file mode 100644 index 5a393ddc..00000000 --- a/lib/librte_ivshmem/rte_ivshmem_version.map +++ /dev/null @@ -1,12 +0,0 @@ -DPDK_2.0 { - global: - - rte_ivshmem_metadata_add_mempool; - rte_ivshmem_metadata_add_memzone; - rte_ivshmem_metadata_add_ring; - rte_ivshmem_metadata_cmdline_generate; - rte_ivshmem_metadata_create; - rte_ivshmem_metadata_dump; - - local: *; -}; diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c index 3028fd43..a80cefd2 100644 --- a/lib/librte_kni/rte_kni.c +++ b/lib/librte_kni/rte_kni.c @@ -210,14 +210,18 @@ rte_kni_init(unsigned int max_kni_ifaces) if (max_kni_ifaces == 0) { RTE_LOG(ERR, KNI, "Invalid number of max_kni_ifaces %d\n", max_kni_ifaces); - rte_panic("Unable to initialize KNI\n"); + RTE_LOG(ERR, KNI, "Unable to initialize KNI\n"); + return; } /* Check FD and open */ if (kni_fd < 0) { kni_fd = open("/dev/" KNI_DEVICE, O_RDWR); - if (kni_fd < 0) - rte_panic("Can not open /dev/%s\n", KNI_DEVICE); + if (kni_fd < 0) { + RTE_LOG(ERR, KNI, + "Can not open /dev/%s\n", KNI_DEVICE); + return; + } } /* Allocate slot objects */ @@ -307,8 +311,8 @@ rte_kni_init(unsigned int max_kni_ifaces) return; kni_fail: - rte_panic("Unable to allocate memory for max_kni_ifaces:%d. Increase the amount of hugepages memory\n", - max_kni_ifaces); + RTE_LOG(ERR, KNI, "Unable to allocate memory for max_kni_ifaces:%d." + "Increase the amount of hugepages memory\n", max_kni_ifaces); } @@ -321,9 +325,7 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool, struct rte_kni_device_info dev_info; struct rte_kni *ctx; char intf_name[RTE_KNI_NAMESIZE]; - char mz_name[RTE_MEMZONE_NAMESIZE]; const struct rte_memzone *mz; - const struct rte_mempool *mp; struct rte_kni_memzone_slot *slot = NULL; if (!pktmbuf_pool || !conf || !conf->name[0]) @@ -414,19 +416,6 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool, dev_info.sync_va = mz->addr; dev_info.sync_phys = mz->phys_addr; - - /* MBUF mempool */ - snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT, - pktmbuf_pool->name); - mz = rte_memzone_lookup(mz_name); - KNI_MEM_CHECK(mz == NULL); - mp = (struct rte_mempool *)mz->addr; - /* KNI currently requires to have only one memory chunk */ - if (mp->nb_mem_chunks != 1) - goto kni_fail; - - dev_info.mbuf_va = STAILQ_FIRST(&mp->mem_list)->addr; - dev_info.mbuf_phys = STAILQ_FIRST(&mp->mem_list)->phys_addr; ctx->pktmbuf_pool = pktmbuf_pool; ctx->group_id = conf->group_id; ctx->slot_id = slot->id; @@ -462,6 +451,20 @@ kni_free_fifo(struct rte_kni_fifo *fifo) } while (ret); } +static void +kni_free_fifo_phy(struct rte_kni_fifo *fifo) +{ + void *mbuf_phys; + int ret; + + do { + ret = kni_fifo_get(fifo, &mbuf_phys, 1); + /* + * TODO: free mbufs + */ + } while (ret); +} + int rte_kni_release(struct rte_kni *kni) { @@ -479,8 +482,8 @@ rte_kni_release(struct rte_kni *kni) /* mbufs in all fifo should be released, except request/response */ kni_free_fifo(kni->tx_q); - kni_free_fifo(kni->rx_q); - kni_free_fifo(kni->alloc_q); + kni_free_fifo_phy(kni->rx_q); + kni_free_fifo_phy(kni->alloc_q); kni_free_fifo(kni->free_q); slot_id = kni->slot_id; @@ -490,8 +493,9 @@ rte_kni_release(struct rte_kni *kni) /* Release memzone */ if (slot_id > kni_memzone_pool.max_ifaces) { - rte_panic("KNI pool: corrupted slot ID: %d, max: %d\n", + RTE_LOG(ERR, KNI, "KNI pool: corrupted slot ID: %d, max: %d\n", slot_id, kni_memzone_pool.max_ifaces); + return -1; } kni_memzone_pool_release(&kni_memzone_pool.slots[slot_id]); @@ -513,7 +517,8 @@ rte_kni_handle_request(struct rte_kni *kni) return 0; /* It is OK of can not getting the request mbuf */ if (req != kni->sync_addr) { - rte_panic("Wrong req pointer %p\n", req); + RTE_LOG(ERR, KNI, "Wrong req pointer %p\n", req); + return -1; } /* Analyze the request and call the relevant actions for it */ @@ -544,10 +549,25 @@ rte_kni_handle_request(struct rte_kni *kni) return 0; } +static void * +va2pa(struct rte_mbuf *m) +{ + return (void *)((unsigned long)m - + ((unsigned long)m->buf_addr - + (unsigned long)m->buf_physaddr)); +} + unsigned rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num) { - unsigned ret = kni_fifo_put(kni->rx_q, (void **)mbufs, num); + void *phy_mbufs[num]; + unsigned int ret; + unsigned int i; + + for (i = 0; i < num; i++) + phy_mbufs[i] = va2pa(mbufs[i]); + + ret = kni_fifo_put(kni->rx_q, phy_mbufs, num); /* Get mbufs from free_q and then free them */ kni_free_mbufs(kni); @@ -585,6 +605,7 @@ kni_allocate_mbufs(struct rte_kni *kni) { int i, ret; struct rte_mbuf *pkts[MAX_MBUF_BURST_NUM]; + void *phys[MAX_MBUF_BURST_NUM]; RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pool) != offsetof(struct rte_kni_mbuf, pool)); @@ -614,13 +635,14 @@ kni_allocate_mbufs(struct rte_kni *kni) RTE_LOG(ERR, KNI, "Out of memory\n"); break; } + phys[i] = va2pa(pkts[i]); } /* No pkt mbuf alocated */ if (i <= 0) return; - ret = kni_fifo_put(kni->alloc_q, (void **)pkts, i); + ret = kni_fifo_put(kni->alloc_q, phys, i); /* Check if any mbufs not put into alloc_q, and then free them */ if (ret >= 0 && ret < i && ret < MAX_MBUF_BURST_NUM) { diff --git a/lib/librte_kni/rte_kni.h b/lib/librte_kni/rte_kni.h index 7363e6cf..37deb472 100644 --- a/lib/librte_kni/rte_kni.h +++ b/lib/librte_kni/rte_kni.h @@ -42,7 +42,7 @@ * interfaces that may be used by the RTE application to receive/transmit * packets from/to Linux kernel net interfaces. * - * This library provide two APIs to burst receive packets from KNI interfaces, + * This library provides two APIs to burst receive packets from KNI interfaces, * and burst transmit packets to KNI interfaces. */ @@ -88,6 +88,7 @@ struct rte_kni_conf { struct rte_pci_addr addr; struct rte_pci_id id; + __extension__ uint8_t force_bind : 1; /* Flag to bind kernel thread */ }; diff --git a/lib/librte_lpm/Makefile b/lib/librte_lpm/Makefile index 656ade27..3dc549dc 100644 --- a/lib/librte_lpm/Makefile +++ b/lib/librte_lpm/Makefile @@ -51,6 +51,8 @@ ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),) SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include += rte_lpm_neon.h else ifeq ($(CONFIG_RTE_ARCH_X86),y) SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include += rte_lpm_sse.h +else ifeq ($(CONFIG_RTE_ARCH_PPC_64),y) +SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include += rte_lpm_altivec.h endif # this lib needs eal diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c index e1b5d94a..8c15c4c9 100644 --- a/lib/librte_lpm/rte_lpm.c +++ b/lib/librte_lpm/rte_lpm.c @@ -942,14 +942,9 @@ add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth, /* Insert new rule into the tbl8 entry. */ for (i = tbl8_index; i < tbl8_index + tbl8_range; i++) { - if (!lpm->tbl8[i].valid || - lpm->tbl8[i].depth <= depth) { - lpm->tbl8[i].valid = VALID; - lpm->tbl8[i].depth = depth; - lpm->tbl8[i].next_hop = next_hop; - - continue; - } + lpm->tbl8[i].valid = VALID; + lpm->tbl8[i].depth = depth; + lpm->tbl8[i].next_hop = next_hop; } /* @@ -1073,14 +1068,9 @@ add_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth, /* Insert new rule into the tbl8 entry. */ for (i = tbl8_index; i < tbl8_index + tbl8_range; i++) { - if (!lpm->tbl8[i].valid || - lpm->tbl8[i].depth <= depth) { - lpm->tbl8[i].valid = VALID; - lpm->tbl8[i].depth = depth; - lpm->tbl8[i].next_hop = next_hop; - - continue; - } + lpm->tbl8[i].valid = VALID; + lpm->tbl8[i].depth = depth; + lpm->tbl8[i].next_hop = next_hop; } /* diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index 2df1d672..682865e4 100644 --- a/lib/librte_lpm/rte_lpm.h +++ b/lib/librte_lpm/rte_lpm.h @@ -93,12 +93,14 @@ extern "C" { #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN /** @internal Tbl24 entry structure. */ +__extension__ struct rte_lpm_tbl_entry_v20 { /** * Stores Next hop (tbl8 or tbl24 when valid_group is not set) or * a group index pointing to a tbl8 structure (tbl24 only, when * valid_group is set) */ + RTE_STD_C11 union { uint8_t next_hop; uint8_t group_idx; @@ -116,6 +118,7 @@ struct rte_lpm_tbl_entry_v20 { uint8_t depth :6; /**< Rule depth. */ }; +__extension__ struct rte_lpm_tbl_entry { /** * Stores Next hop (tbl8 or tbl24 when valid_group is not set) or @@ -137,6 +140,7 @@ struct rte_lpm_tbl_entry { }; #else +__extension__ struct rte_lpm_tbl_entry_v20 { uint8_t depth :6; uint8_t valid_group :1; @@ -147,6 +151,7 @@ struct rte_lpm_tbl_entry_v20 { }; }; +__extension__ struct rte_lpm_tbl_entry { uint32_t depth :6; uint32_t valid_group :1; @@ -193,7 +198,7 @@ struct rte_lpm_v20 { __rte_cache_aligned; /**< LPM tbl24 table. */ struct rte_lpm_tbl_entry_v20 tbl8[RTE_LPM_TBL8_NUM_ENTRIES] __rte_cache_aligned; /**< LPM tbl8 table. */ - struct rte_lpm_rule_v20 rules_tbl[0] \ + struct rte_lpm_rule_v20 rules_tbl[] __rte_cache_aligned; /**< LPM rules. */ }; @@ -480,6 +485,8 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], #if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) #include "rte_lpm_neon.h" +#elif defined(RTE_ARCH_PPC_64) +#include "rte_lpm_altivec.h" #else #include "rte_lpm_sse.h" #endif diff --git a/lib/librte_lpm/rte_lpm_altivec.h b/lib/librte_lpm/rte_lpm_altivec.h new file mode 100644 index 00000000..e26e0875 --- /dev/null +++ b/lib/librte_lpm/rte_lpm_altivec.h @@ -0,0 +1,154 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_LPM_ALTIVEC_H_ +#define _RTE_LPM_ALTIVEC_H_ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], + uint32_t defv) +{ + vector signed int i24; + rte_xmm_t i8; + uint32_t tbl[4]; + uint64_t idx, pt, pt2; + const uint32_t *ptbl; + + const uint32_t mask = UINT8_MAX; + const vector signed int mask8 = (xmm_t){mask, mask, mask, mask}; + + /* + * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 2 LPM entries + * as one 64-bit value (0x0300000003000000). + */ + const uint64_t mask_xv = + ((uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK | + (uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32); + + /* + * RTE_LPM_LOOKUP_SUCCESS for 2 LPM entries + * as one 64-bit value (0x0100000001000000). + */ + const uint64_t mask_v = + ((uint64_t)RTE_LPM_LOOKUP_SUCCESS | + (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32); + + /* get 4 indexes for tbl24[]. */ + i24 = vec_sr((xmm_t) ip, + (vector unsigned int){CHAR_BIT, CHAR_BIT, CHAR_BIT, CHAR_BIT}); + + /* extract values from tbl24[] */ + idx = (uint32_t)i24[0]; + idx = idx < (1<<24) ? idx : (1<<24)-1; + ptbl = (const uint32_t *)&lpm->tbl24[idx]; + tbl[0] = *ptbl; + + idx = (uint32_t) i24[1]; + idx = idx < (1<<24) ? idx : (1<<24)-1; + ptbl = (const uint32_t *)&lpm->tbl24[idx]; + tbl[1] = *ptbl; + + idx = (uint32_t) i24[2]; + idx = idx < (1<<24) ? idx : (1<<24)-1; + ptbl = (const uint32_t *)&lpm->tbl24[idx]; + tbl[2] = *ptbl; + + idx = (uint32_t) i24[3]; + idx = idx < (1<<24) ? idx : (1<<24)-1; + ptbl = (const uint32_t *)&lpm->tbl24[idx]; + tbl[3] = *ptbl; + + /* get 4 indexes for tbl8[]. */ + i8.x = vec_and(ip, mask8); + + pt = (uint64_t)tbl[0] | + (uint64_t)tbl[1] << 32; + pt2 = (uint64_t)tbl[2] | + (uint64_t)tbl[3] << 32; + + /* search successfully finished for all 4 IP addresses. */ + if (likely((pt & mask_xv) == mask_v) && + likely((pt2 & mask_xv) == mask_v)) { + *(uint64_t *)hop = pt & RTE_LPM_MASKX4_RES; + *(uint64_t *)(hop + 2) = pt2 & RTE_LPM_MASKX4_RES; + return; + } + + if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[0] = i8.u32[0] + + (uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[0]]; + tbl[0] = *ptbl; + } + if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[1] = i8.u32[1] + + (uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[1]]; + tbl[1] = *ptbl; + } + if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[2] = i8.u32[2] + + (uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[2]]; + tbl[2] = *ptbl; + } + if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[3] = i8.u32[3] + + (uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[3]]; + tbl[3] = *ptbl; + } + + hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[0] & 0x00FFFFFF : defv; + hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[1] & 0x00FFFFFF : defv; + hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[2] & 0x00FFFFFF : defv; + hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[3] & 0x00FFFFFF : defv; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LPM_ALTIVEC_H_ */ diff --git a/lib/librte_lpm/rte_lpm_neon.h b/lib/librte_lpm/rte_lpm_neon.h index 7c643159..7efd9a0d 100644 --- a/lib/librte_lpm/rte_lpm_neon.h +++ b/lib/librte_lpm/rte_lpm_neon.h @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { diff --git a/lib/librte_lpm/rte_lpm_sse.h b/lib/librte_lpm/rte_lpm_sse.h index da830995..ef33c6a1 100644 --- a/lib/librte_lpm/rte_lpm_sse.h +++ b/lib/librte_lpm/rte_lpm_sse.h @@ -38,6 +38,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile index 8d62b0d3..4ae2e8c8 100644 --- a/lib/librte_mbuf/Makefile +++ b/lib/librte_mbuf/Makefile @@ -41,10 +41,10 @@ EXPORT_MAP := rte_mbuf_version.map LIBABIVER := 2 # all source are stored in SRCS-y -SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c +SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c # install includes -SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h +SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h rte_mbuf_ptype.h # this lib needs eal DEPDIRS-$(CONFIG_RTE_LIBRTE_MBUF) += lib/librte_eal lib/librte_mempool diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c index d2c87526..63f43c89 100644 --- a/lib/librte_mbuf/rte_mbuf.c +++ b/lib/librte_mbuf/rte_mbuf.c @@ -53,12 +53,12 @@ #include #include #include -#include #include #include #include #include #include +#include /* * ctrlmbuf constructor, given as a callback function to @@ -264,6 +264,40 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len) } } +/* read len data bytes in a mbuf at specified offset (internal) */ +const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off, + uint32_t len, void *buf) +{ + const struct rte_mbuf *seg = m; + uint32_t buf_off = 0, copy_len; + + if (off + len > rte_pktmbuf_pkt_len(m)) + return NULL; + + while (off >= rte_pktmbuf_data_len(seg)) { + off -= rte_pktmbuf_data_len(seg); + seg = seg->next; + } + + if (off + len <= rte_pktmbuf_data_len(seg)) + return rte_pktmbuf_mtod_offset(seg, char *, off); + + /* rare case: header is split among several segments */ + while (len > 0) { + copy_len = rte_pktmbuf_data_len(seg) - off; + if (copy_len > len) + copy_len = len; + rte_memcpy((char *)buf + buf_off, + rte_pktmbuf_mtod_offset(seg, char *, off), copy_len); + off = 0; + buf_off += copy_len; + len -= copy_len; + seg = seg->next; + } + + return buf; +} + /* * Get the name of a RX offload flag. Must be kept synchronized with flag * definitions in rte_mbuf.h. @@ -275,16 +309,78 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask) case PKT_RX_RSS_HASH: return "PKT_RX_RSS_HASH"; case PKT_RX_FDIR: return "PKT_RX_FDIR"; case PKT_RX_L4_CKSUM_BAD: return "PKT_RX_L4_CKSUM_BAD"; + case PKT_RX_L4_CKSUM_GOOD: return "PKT_RX_L4_CKSUM_GOOD"; + case PKT_RX_L4_CKSUM_NONE: return "PKT_RX_L4_CKSUM_NONE"; case PKT_RX_IP_CKSUM_BAD: return "PKT_RX_IP_CKSUM_BAD"; + case PKT_RX_IP_CKSUM_GOOD: return "PKT_RX_IP_CKSUM_GOOD"; + case PKT_RX_IP_CKSUM_NONE: return "PKT_RX_IP_CKSUM_NONE"; case PKT_RX_EIP_CKSUM_BAD: return "PKT_RX_EIP_CKSUM_BAD"; case PKT_RX_VLAN_STRIPPED: return "PKT_RX_VLAN_STRIPPED"; case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP"; case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST"; case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED"; + case PKT_RX_LRO: return "PKT_RX_LRO"; default: return NULL; } } +struct flag_mask { + uint64_t flag; + uint64_t mask; + const char *default_name; +}; + +/* write the list of rx ol flags in buffer buf */ +int +rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen) +{ + const struct flag_mask rx_flags[] = { + { PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT, NULL }, + { PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, NULL }, + { PKT_RX_FDIR, PKT_RX_FDIR, NULL }, + { PKT_RX_L4_CKSUM_BAD, PKT_RX_L4_CKSUM_MASK, NULL }, + { PKT_RX_L4_CKSUM_GOOD, PKT_RX_L4_CKSUM_MASK, NULL }, + { PKT_RX_L4_CKSUM_NONE, PKT_RX_L4_CKSUM_MASK, NULL }, + { PKT_RX_L4_CKSUM_UNKNOWN, PKT_RX_L4_CKSUM_MASK, + "PKT_RX_L4_CKSUM_UNKNOWN" }, + { PKT_RX_IP_CKSUM_BAD, PKT_RX_IP_CKSUM_MASK, NULL }, + { PKT_RX_IP_CKSUM_GOOD, PKT_RX_IP_CKSUM_MASK, NULL }, + { PKT_RX_IP_CKSUM_NONE, PKT_RX_IP_CKSUM_MASK, NULL }, + { PKT_RX_IP_CKSUM_UNKNOWN, PKT_RX_IP_CKSUM_MASK, + "PKT_RX_IP_CKSUM_UNKNOWN" }, + { PKT_RX_EIP_CKSUM_BAD, PKT_RX_EIP_CKSUM_BAD, NULL }, + { PKT_RX_VLAN_STRIPPED, PKT_RX_VLAN_STRIPPED, NULL }, + { PKT_RX_IEEE1588_PTP, PKT_RX_IEEE1588_PTP, NULL }, + { PKT_RX_IEEE1588_TMST, PKT_RX_IEEE1588_TMST, NULL }, + { PKT_RX_QINQ_STRIPPED, PKT_RX_QINQ_STRIPPED, NULL }, + { PKT_RX_LRO, PKT_RX_LRO, NULL }, + }; + const char *name; + unsigned int i; + int ret; + + if (buflen == 0) + return -1; + + buf[0] = '\0'; + for (i = 0; i < RTE_DIM(rx_flags); i++) { + if ((mask & rx_flags[i].mask) != rx_flags[i].flag) + continue; + name = rte_get_rx_ol_flag_name(rx_flags[i].flag); + if (name == NULL) + name = rx_flags[i].default_name; + ret = snprintf(buf, buflen, "%s ", name); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + + return 0; +} + /* * Get the name of a TX offload flag. Must be kept synchronized with flag * definitions in rte_mbuf.h. @@ -304,6 +400,63 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask) case PKT_TX_OUTER_IP_CKSUM: return "PKT_TX_OUTER_IP_CKSUM"; case PKT_TX_OUTER_IPV4: return "PKT_TX_OUTER_IPV4"; case PKT_TX_OUTER_IPV6: return "PKT_TX_OUTER_IPV6"; + case PKT_TX_TUNNEL_VXLAN: return "PKT_TX_TUNNEL_VXLAN"; + case PKT_TX_TUNNEL_GRE: return "PKT_TX_TUNNEL_GRE"; + case PKT_TX_TUNNEL_IPIP: return "PKT_TX_TUNNEL_IPIP"; + case PKT_TX_TUNNEL_GENEVE: return "PKT_TX_TUNNEL_GENEVE"; default: return NULL; } } + +/* write the list of tx ol flags in buffer buf */ +int +rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen) +{ + const struct flag_mask tx_flags[] = { + { PKT_TX_VLAN_PKT, PKT_TX_VLAN_PKT, NULL }, + { PKT_TX_IP_CKSUM, PKT_TX_IP_CKSUM, NULL }, + { PKT_TX_TCP_CKSUM, PKT_TX_L4_MASK, NULL }, + { PKT_TX_SCTP_CKSUM, PKT_TX_L4_MASK, NULL }, + { PKT_TX_UDP_CKSUM, PKT_TX_L4_MASK, NULL }, + { PKT_TX_L4_NO_CKSUM, PKT_TX_L4_MASK, "PKT_TX_L4_NO_CKSUM" }, + { PKT_TX_IEEE1588_TMST, PKT_TX_IEEE1588_TMST, NULL }, + { PKT_TX_TCP_SEG, PKT_TX_TCP_SEG, NULL }, + { PKT_TX_IPV4, PKT_TX_IPV4, NULL }, + { PKT_TX_IPV6, PKT_TX_IPV6, NULL }, + { PKT_TX_OUTER_IP_CKSUM, PKT_TX_OUTER_IP_CKSUM, NULL }, + { PKT_TX_OUTER_IPV4, PKT_TX_OUTER_IPV4, NULL }, + { PKT_TX_OUTER_IPV6, PKT_TX_OUTER_IPV6, NULL }, + { PKT_TX_TUNNEL_VXLAN, PKT_TX_TUNNEL_MASK, + "PKT_TX_TUNNEL_NONE" }, + { PKT_TX_TUNNEL_GRE, PKT_TX_TUNNEL_MASK, + "PKT_TX_TUNNEL_NONE" }, + { PKT_TX_TUNNEL_IPIP, PKT_TX_TUNNEL_MASK, + "PKT_TX_TUNNEL_NONE" }, + { PKT_TX_TUNNEL_GENEVE, PKT_TX_TUNNEL_MASK, + "PKT_TX_TUNNEL_NONE" }, + }; + const char *name; + unsigned int i; + int ret; + + if (buflen == 0) + return -1; + + buf[0] = '\0'; + for (i = 0; i < RTE_DIM(tx_flags); i++) { + if ((mask & tx_flags[i].mask) != tx_flags[i].flag) + continue; + name = rte_get_tx_ol_flag_name(tx_flags[i].flag); + if (name == NULL) + name = tx_flags[i].default_name; + ret = snprintf(buf, buflen, "%s ", name); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + + return 0; +} diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index 101485fb..ead7c6ea 100644 --- a/lib/librte_mbuf/rte_mbuf.h +++ b/lib/librte_mbuf/rte_mbuf.h @@ -44,7 +44,7 @@ * buffers. The message buffers are stored in a mempool, using the * RTE mempool library. * - * This library provide an API to allocate/free packet mbufs, which are + * This library provides an API to allocate/free packet mbufs, which are * used to carry network packets. * * To understand the concepts of packet buffers or mbufs, you @@ -60,6 +60,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -90,8 +91,25 @@ extern "C" { #define PKT_RX_RSS_HASH (1ULL << 1) /**< RX packet with RSS hash result. */ #define PKT_RX_FDIR (1ULL << 2) /**< RX packet with FDIR match indicate. */ -#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) /**< L4 cksum of RX pkt. is not OK. */ -#define PKT_RX_IP_CKSUM_BAD (1ULL << 4) /**< IP cksum of RX pkt. is not OK. */ + +/** + * Deprecated. + * Checking this flag alone is deprecated: check the 2 bits of + * PKT_RX_L4_CKSUM_MASK. + * This flag was set when the L4 checksum of a packet was detected as + * wrong by the hardware. + */ +#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) + +/** + * Deprecated. + * Checking this flag alone is deprecated: check the 2 bits of + * PKT_RX_IP_CKSUM_MASK. + * This flag was set when the IP checksum of a packet was detected as + * wrong by the hardware. + */ +#define PKT_RX_IP_CKSUM_BAD (1ULL << 4) + #define PKT_RX_EIP_CKSUM_BAD (1ULL << 5) /**< External IP header checksum error. */ /** @@ -101,7 +119,35 @@ extern "C" { */ #define PKT_RX_VLAN_STRIPPED (1ULL << 6) -/* hole, some bits can be reused here */ +/** + * Mask of bits used to determine the status of RX IP checksum. + * - PKT_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum + * - PKT_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong + * - PKT_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid + * - PKT_RX_IP_CKSUM_NONE: the IP checksum is not correct in the packet + * data, but the integrity of the IP header is verified. + */ +#define PKT_RX_IP_CKSUM_MASK ((1ULL << 4) | (1ULL << 7)) + +#define PKT_RX_IP_CKSUM_UNKNOWN 0 +#define PKT_RX_IP_CKSUM_BAD (1ULL << 4) +#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7) +#define PKT_RX_IP_CKSUM_NONE ((1ULL << 4) | (1ULL << 7)) + +/** + * Mask of bits used to determine the status of RX L4 checksum. + * - PKT_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum + * - PKT_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong + * - PKT_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid + * - PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet + * data, but the integrity of the L4 data is verified. + */ +#define PKT_RX_L4_CKSUM_MASK ((1ULL << 3) | (1ULL << 8)) + +#define PKT_RX_L4_CKSUM_UNKNOWN 0 +#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) +#define PKT_RX_L4_CKSUM_GOOD (1ULL << 8) +#define PKT_RX_L4_CKSUM_NONE ((1ULL << 3) | (1ULL << 8)) #define PKT_RX_IEEE1588_PTP (1ULL << 9) /**< RX IEEE1588 L2 Ethernet PT Packet. */ #define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/ @@ -124,10 +170,29 @@ extern "C" { */ #define PKT_RX_QINQ_PKT PKT_RX_QINQ_STRIPPED +/** + * When packets are coalesced by a hardware or virtual driver, this flag + * can be set in the RX mbuf, meaning that the m->tso_segsz field is + * valid and is set to the segment size of original packets. + */ +#define PKT_RX_LRO (1ULL << 16) + /* add new RX flags here */ /* add new TX flags here */ +/** + * Bits 45:48 used for the tunnel type. + * When doing Tx offload like TSO or checksum, the HW needs to configure the + * tunnel type into the HW descriptors. + */ +#define PKT_TX_TUNNEL_VXLAN (0x1ULL << 45) +#define PKT_TX_TUNNEL_GRE (0x2ULL << 45) +#define PKT_TX_TUNNEL_IPIP (0x3ULL << 45) +#define PKT_TX_TUNNEL_GENEVE (0x4ULL << 45) +/* add new TX TUNNEL type here */ +#define PKT_TX_TUNNEL_MASK (0xFULL << 45) + /** * Second VLAN insertion (QinQ) flag. */ @@ -225,500 +290,6 @@ extern "C" { /* Use final bit of flags to indicate a control mbuf */ #define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */ -/* - * 32 bits are divided into several fields to mark packet types. Note that - * each field is indexical. - * - Bit 3:0 is for L2 types. - * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types. - * - Bit 11:8 is for L4 or outer L4 (for tunneling case) types. - * - Bit 15:12 is for tunnel types. - * - Bit 19:16 is for inner L2 types. - * - Bit 23:20 is for inner L3 types. - * - Bit 27:24 is for inner L4 types. - * - Bit 31:28 is reserved. - * - * To be compatible with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT, - * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP - * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits. - * - * Note that L3 types values are selected for checking IPV4/IPV6 header from - * performance point of view. Reading annotations of RTE_ETH_IS_IPV4_HDR and - * RTE_ETH_IS_IPV6_HDR is needed for any future changes of L3 type values. - * - * Note that the packet types of the same packet recognized by different - * hardware may be different, as different hardware may have different - * capability of packet type recognition. - * - * examples: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=0x29 - * | 'version'=6, 'next header'=0x3A - * | 'ICMPv6 header'> - * will be recognized on i40e hardware as packet type combination of, - * RTE_PTYPE_L2_ETHER | - * RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - * RTE_PTYPE_TUNNEL_IP | - * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - * RTE_PTYPE_INNER_L4_ICMP. - * - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=0x2F - * | 'GRE header' - * | 'version'=6, 'next header'=0x11 - * | 'UDP header'> - * will be recognized on i40e hardware as packet type combination of, - * RTE_PTYPE_L2_ETHER | - * RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - * RTE_PTYPE_TUNNEL_GRENAT | - * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - * RTE_PTYPE_INNER_L4_UDP. - */ -#define RTE_PTYPE_UNKNOWN 0x00000000 -/** - * Ethernet packet type. - * It is used for outer packet for tunneling cases. - * - * Packet format: - * <'ether type'=[0x0800|0x86DD]> - */ -#define RTE_PTYPE_L2_ETHER 0x00000001 -/** - * Ethernet packet type for time sync. - * - * Packet format: - * <'ether type'=0x88F7> - */ -#define RTE_PTYPE_L2_ETHER_TIMESYNC 0x00000002 -/** - * ARP (Address Resolution Protocol) packet type. - * - * Packet format: - * <'ether type'=0x0806> - */ -#define RTE_PTYPE_L2_ETHER_ARP 0x00000003 -/** - * LLDP (Link Layer Discovery Protocol) packet type. - * - * Packet format: - * <'ether type'=0x88CC> - */ -#define RTE_PTYPE_L2_ETHER_LLDP 0x00000004 -/** - * NSH (Network Service Header) packet type. - * - * Packet format: - * <'ether type'=0x894F> - */ -#define RTE_PTYPE_L2_ETHER_NSH 0x00000005 -/** - * Mask of layer 2 packet types. - * It is used for outer packet for tunneling cases. - */ -#define RTE_PTYPE_L2_MASK 0x0000000f -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for outer packet for tunneling cases, and does not contain any - * header option. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=5> - */ -#define RTE_PTYPE_L3_IPV4 0x00000010 -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for outer packet for tunneling cases, and contains header - * options. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=[6-15], 'options'> - */ -#define RTE_PTYPE_L3_IPV4_EXT 0x00000030 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for outer packet for tunneling cases, and does not contain any - * extension header. - * - * Packet format: - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=0x3B> - */ -#define RTE_PTYPE_L3_IPV6 0x00000040 -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for outer packet for tunneling cases, and may or maynot contain - * header options. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=[5-15], <'options'>> - */ -#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN 0x00000090 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for outer packet for tunneling cases, and contains extension - * headers. - * - * Packet format: - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], - * 'extension headers'> - */ -#define RTE_PTYPE_L3_IPV6_EXT 0x000000c0 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for outer packet for tunneling cases, and may or maynot contain - * extension headers. - * - * Packet format: - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], - * <'extension headers'>> - */ -#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN 0x000000e0 -/** - * Mask of layer 3 packet types. - * It is used for outer packet for tunneling cases. - */ -#define RTE_PTYPE_L3_MASK 0x000000f0 -/** - * TCP (Transmission Control Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=6, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=6> - */ -#define RTE_PTYPE_L4_TCP 0x00000100 -/** - * UDP (User Datagram Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=17, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=17> - */ -#define RTE_PTYPE_L4_UDP 0x00000200 -/** - * Fragmented IP (Internet Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * It refers to those packets of any IP types, which can be recognized as - * fragmented. A fragmented packet cannot be recognized as any other L4 types - * (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP, - * RTE_PTYPE_L4_NONFRAG). - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'MF'=1> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=44> - */ -#define RTE_PTYPE_L4_FRAG 0x00000300 -/** - * SCTP (Stream Control Transmission Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=132, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=132> - */ -#define RTE_PTYPE_L4_SCTP 0x00000400 -/** - * ICMP (Internet Control Message Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=1, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=1> - */ -#define RTE_PTYPE_L4_ICMP 0x00000500 -/** - * Non-fragmented IP (Internet Protocol) packet type. - * It is used for outer packet for tunneling cases. - * - * It refers to those packets of any IP types, while cannot be recognized as - * any of above L4 types (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, - * RTE_PTYPE_L4_FRAG, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP). - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'!=[6|17|44|132|1]> - */ -#define RTE_PTYPE_L4_NONFRAG 0x00000600 -/** - * Mask of layer 4 packet types. - * It is used for outer packet for tunneling cases. - */ -#define RTE_PTYPE_L4_MASK 0x00000f00 -/** - * IP (Internet Protocol) in IP (Internet Protocol) tunneling packet type. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=[4|41]> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=[4|41]> - */ -#define RTE_PTYPE_TUNNEL_IP 0x00001000 -/** - * GRE (Generic Routing Encapsulation) tunneling packet type. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=47> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=47> - */ -#define RTE_PTYPE_TUNNEL_GRE 0x00002000 -/** - * VXLAN (Virtual eXtensible Local Area Network) tunneling packet type. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=17 - * | 'destination port'=4798> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=17 - * | 'destination port'=4798> - */ -#define RTE_PTYPE_TUNNEL_VXLAN 0x00003000 -/** - * NVGRE (Network Virtualization using Generic Routing Encapsulation) tunneling - * packet type. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=47 - * | 'protocol type'=0x6558> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=47 - * | 'protocol type'=0x6558'> - */ -#define RTE_PTYPE_TUNNEL_NVGRE 0x00004000 -/** - * GENEVE (Generic Network Virtualization Encapsulation) tunneling packet type. - * - * Packet format: - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=17 - * | 'destination port'=6081> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=17 - * | 'destination port'=6081> - */ -#define RTE_PTYPE_TUNNEL_GENEVE 0x00005000 -/** - * Tunneling packet type of Teredo, VXLAN (Virtual eXtensible Local Area - * Network) or GRE (Generic Routing Encapsulation) could be recognized as this - * packet type, if they can not be recognized independently as of hardware - * capability. - */ -#define RTE_PTYPE_TUNNEL_GRENAT 0x00006000 -/** - * Mask of tunneling packet types. - */ -#define RTE_PTYPE_TUNNEL_MASK 0x0000f000 -/** - * Ethernet packet type. - * It is used for inner packet type only. - * - * Packet format (inner only): - * <'ether type'=[0x800|0x86DD]> - */ -#define RTE_PTYPE_INNER_L2_ETHER 0x00010000 -/** - * Ethernet packet type with VLAN (Virtual Local Area Network) tag. - * - * Packet format (inner only): - * <'ether type'=[0x800|0x86DD], vlan=[1-4095]> - */ -#define RTE_PTYPE_INNER_L2_ETHER_VLAN 0x00020000 -/** - * Mask of inner layer 2 packet types. - */ -#define RTE_PTYPE_INNER_L2_MASK 0x000f0000 -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for inner packet only, and does not contain any header option. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=5> - */ -#define RTE_PTYPE_INNER_L3_IPV4 0x00100000 -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for inner packet only, and contains header options. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=[6-15], 'options'> - */ -#define RTE_PTYPE_INNER_L3_IPV4_EXT 0x00200000 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for inner packet only, and does not contain any extension header. - * - * Packet format (inner only): - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=0x3B> - */ -#define RTE_PTYPE_INNER_L3_IPV6 0x00300000 -/** - * IP (Internet Protocol) version 4 packet type. - * It is used for inner packet only, and may or maynot contain header options. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'ihl'=[5-15], <'options'>> - */ -#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for inner packet only, and contains extension headers. - * - * Packet format (inner only): - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], - * 'extension headers'> - */ -#define RTE_PTYPE_INNER_L3_IPV6_EXT 0x00500000 -/** - * IP (Internet Protocol) version 6 packet type. - * It is used for inner packet only, and may or maynot contain extension - * headers. - * - * Packet format (inner only): - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], - * <'extension headers'>> - */ -#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000 -/** - * Mask of inner layer 3 packet types. - */ -#define RTE_PTYPE_INNER_L3_MASK 0x00f00000 -/** - * TCP (Transmission Control Protocol) packet type. - * It is used for inner packet only. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=6, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=6> - */ -#define RTE_PTYPE_INNER_L4_TCP 0x01000000 -/** - * UDP (User Datagram Protocol) packet type. - * It is used for inner packet only. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=17, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=17> - */ -#define RTE_PTYPE_INNER_L4_UDP 0x02000000 -/** - * Fragmented IP (Internet Protocol) packet type. - * It is used for inner packet only, and may or maynot have layer 4 packet. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'MF'=1> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=44> - */ -#define RTE_PTYPE_INNER_L4_FRAG 0x03000000 -/** - * SCTP (Stream Control Transmission Protocol) packet type. - * It is used for inner packet only. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=132, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=132> - */ -#define RTE_PTYPE_INNER_L4_SCTP 0x04000000 -/** - * ICMP (Internet Control Message Protocol) packet type. - * It is used for inner packet only. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'=1, 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'=1> - */ -#define RTE_PTYPE_INNER_L4_ICMP 0x05000000 -/** - * Non-fragmented IP (Internet Protocol) packet type. - * It is used for inner packet only, and may or maynot have other unknown layer - * 4 packet types. - * - * Packet format (inner only): - * <'ether type'=0x0800 - * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0> - * or, - * <'ether type'=0x86DD - * | 'version'=6, 'next header'!=[6|17|44|132|1]> - */ -#define RTE_PTYPE_INNER_L4_NONFRAG 0x06000000 -/** - * Mask of inner layer 4 packet types. - */ -#define RTE_PTYPE_INNER_L4_MASK 0x0f000000 - -/** - * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by - * one, bit 4 is selected to be used for IPv4 only. Then checking bit 4 can - * determine if it is an IPV4 packet. - */ -#define RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4) - -/** - * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by - * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can - * determine if it is an IPV4 packet. - */ -#define RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6) - -/* Check if it is a tunneling packet */ -#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) & (RTE_PTYPE_TUNNEL_MASK | \ - RTE_PTYPE_INNER_L2_MASK | \ - RTE_PTYPE_INNER_L3_MASK | \ - RTE_PTYPE_INNER_L4_MASK)) - /** Alignment constraint of mbuf private area. */ #define RTE_MBUF_PRIV_ALIGN 8 @@ -732,6 +303,20 @@ extern "C" { */ const char *rte_get_rx_ol_flag_name(uint64_t mask); +/** + * Dump the list of RX offload flags in a buffer + * + * @param mask + * The mask describing the RX flags. + * @param buf + * The output buffer. + * @param buflen + * The length of the buffer. + * @return + * 0 on success, (-1) on error. + */ +int rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen); + /** * Get the name of a TX offload flag * @@ -744,6 +329,20 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask); */ const char *rte_get_tx_ol_flag_name(uint64_t mask); +/** + * Dump the list of TX offload flags in a buffer + * + * @param mask + * The mask describing the TX flags. + * @param buf + * The output buffer. + * @param buflen + * The length of the buffer. + * @return + * 0 on success, (-1) on error. + */ +int rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen); + /** * Some NICs need at least 2KB buffer to RX standard Ethernet frame without * splitting it into multiple segments. @@ -756,8 +355,11 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask); /* define a set of marker types that can be used to refer to set points in the * mbuf */ +__extension__ typedef void *MARKER[0]; /**< generic marker for a point in a structure */ +__extension__ typedef uint8_t MARKER8[0]; /**< generic marker with 1B alignment */ +__extension__ typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 bytes * with a single assignment */ @@ -784,6 +386,7 @@ struct rte_mbuf { * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC * config option. */ + RTE_STD_C11 union { rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */ uint16_t refcnt; /**< Non-atomically accessed refcnt */ @@ -803,6 +406,7 @@ struct rte_mbuf { * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the * vlan is stripped from the data. */ + RTE_STD_C11 union { uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */ struct { @@ -824,6 +428,7 @@ struct rte_mbuf { union { uint32_t rss; /**< RSS hash result if RSS enabled */ struct { + RTE_STD_C11 union { struct { uint16_t hash; @@ -851,6 +456,7 @@ struct rte_mbuf { /* second cache line - fields only used in slow path or on TX */ MARKER cacheline1 __rte_cache_min_aligned; + RTE_STD_C11 union { void *userdata; /**< Can be used for external metadata */ uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */ @@ -860,10 +466,15 @@ struct rte_mbuf { struct rte_mbuf *next; /**< Next segment of scattered packet. */ /* fields to support TX offloads */ + RTE_STD_C11 union { uint64_t tx_offload; /**< combined for easy fetch */ + __extension__ struct { - uint64_t l2_len:7; /**< L2 (MAC) Header Length. */ + uint64_t l2_len:7; + /**< L2 (MAC) Header Length for non-tunneling pkt. + * Outer_L4_len + ... + Inner_L2_len for tunneling pkt. + */ uint64_t l3_len:9; /**< L3 (IP) Header Length. */ uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */ uint64_t tso_segsz:16; /**< TCP TSO segment size */ @@ -1157,13 +768,6 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp) return m; } -/* compat with older versions */ -__rte_deprecated static inline struct rte_mbuf * -__rte_mbuf_raw_alloc(struct rte_mempool *mp) -{ - return rte_mbuf_raw_alloc(mp); -} - /** * @internal Put mbuf back into its original mempool. * The use of that function is reserved for RTE internal needs. @@ -1384,6 +988,19 @@ rte_pktmbuf_priv_size(struct rte_mempool *mp) return mbp_priv->mbuf_priv_size; } +/** + * Reset the data_off field of a packet mbuf to its default value. + * + * The given mbuf must have only one segment, which should be empty. + * + * @param m + * The packet mbuf's data_off field has to be reset. + */ +static inline void rte_pktmbuf_reset_headroom(struct rte_mbuf *m) +{ + m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len); +} + /** * Reset the fields of a packet mbuf to their default values. * @@ -1404,8 +1021,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m) m->ol_flags = 0; m->packet_type = 0; - m->data_off = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ? - RTE_PKTMBUF_HEADROOM : m->buf_len; + rte_pktmbuf_reset_headroom(m); m->data_len = 0; __rte_mbuf_sanity_check(m, 1); @@ -1569,7 +1185,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m) m->buf_addr = (char *)m + mbuf_size; m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size; m->buf_len = (uint16_t)buf_len; - m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len); + rte_pktmbuf_reset_headroom(m); m->data_len = 0; m->ol_flags = 0; @@ -1957,6 +1573,41 @@ static inline int rte_pktmbuf_is_contiguous(const struct rte_mbuf *m) return !!(m->nb_segs == 1); } +/** + * @internal used by rte_pktmbuf_read(). + */ +const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off, + uint32_t len, void *buf); + +/** + * Read len data bytes in a mbuf at specified offset. + * + * If the data is contiguous, return the pointer in the mbuf data, else + * copy the data in the buffer provided by the user and return its + * pointer. + * + * @param m + * The pointer to the mbuf. + * @param off + * The offset of the data in the mbuf. + * @param len + * The amount of bytes to read. + * @param buf + * The buffer where data is copied if it is not contigous in mbuf + * data. Its length should be at least equal to the len parameter. + * @return + * The pointer to the data, either in the mbuf if it is contiguous, + * or in the user buffer. If mbuf is too small, NULL is returned. + */ +static inline const void *rte_pktmbuf_read(const struct rte_mbuf *m, + uint32_t off, uint32_t len, void *buf) +{ + if (likely(off + len <= rte_pktmbuf_data_len(m))) + return rte_pktmbuf_mtod_offset(m, char *, off); + else + return __rte_pktmbuf_read(m, off, len, buf); +} + /** * Chain an mbuf to another, thereby creating a segmented packet. * @@ -1996,7 +1647,7 @@ static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail } /** - * Dump an mbuf structure to the console. + * Dump an mbuf structure to a file. * * Dump all fields for the given packet mbuf and all its associated * segments (in the case of a chained buffer). diff --git a/lib/librte_mbuf/rte_mbuf_ptype.c b/lib/librte_mbuf/rte_mbuf_ptype.c new file mode 100644 index 00000000..e5c4fae3 --- /dev/null +++ b/lib/librte_mbuf/rte_mbuf_ptype.c @@ -0,0 +1,227 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include + +/* get the name of the l2 packet type */ +const char *rte_get_ptype_l2_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_L2_MASK) { + case RTE_PTYPE_L2_ETHER: return "L2_ETHER"; + case RTE_PTYPE_L2_ETHER_TIMESYNC: return "L2_ETHER_TIMESYNC"; + case RTE_PTYPE_L2_ETHER_ARP: return "L2_ETHER_ARP"; + case RTE_PTYPE_L2_ETHER_LLDP: return "L2_ETHER_LLDP"; + case RTE_PTYPE_L2_ETHER_NSH: return "L2_ETHER_NSH"; + case RTE_PTYPE_L2_ETHER_VLAN: return "L2_ETHER_VLAN"; + case RTE_PTYPE_L2_ETHER_QINQ: return "L2_ETHER_QINQ"; + default: return "L2_UNKNOWN"; + } +} + +/* get the name of the l3 packet type */ +const char *rte_get_ptype_l3_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_L3_MASK) { + case RTE_PTYPE_L3_IPV4: return "L3_IPV4"; + case RTE_PTYPE_L3_IPV4_EXT: return "L3_IPV4_EXT"; + case RTE_PTYPE_L3_IPV6: return "L3_IPV6"; + case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN: return "L3_IPV4_EXT_UNKNOWN"; + case RTE_PTYPE_L3_IPV6_EXT: return "L3_IPV6_EXT"; + case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN: return "L3_IPV6_EXT_UNKNOWN"; + default: return "L3_UNKNOWN"; + } +} + +/* get the name of the l4 packet type */ +const char *rte_get_ptype_l4_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_L4_MASK) { + case RTE_PTYPE_L4_TCP: return "L4_TCP"; + case RTE_PTYPE_L4_UDP: return "L4_UDP"; + case RTE_PTYPE_L4_FRAG: return "L4_FRAG"; + case RTE_PTYPE_L4_SCTP: return "L4_SCTP"; + case RTE_PTYPE_L4_ICMP: return "L4_ICMP"; + case RTE_PTYPE_L4_NONFRAG: return "L4_NONFRAG"; + default: return "L4_UNKNOWN"; + } +} + +/* get the name of the tunnel packet type */ +const char *rte_get_ptype_tunnel_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_TUNNEL_MASK) { + case RTE_PTYPE_TUNNEL_IP: return "TUNNEL_IP"; + case RTE_PTYPE_TUNNEL_GRE: return "TUNNEL_GRE"; + case RTE_PTYPE_TUNNEL_VXLAN: return "TUNNEL_VXLAN"; + case RTE_PTYPE_TUNNEL_NVGRE: return "TUNNEL_NVGRE"; + case RTE_PTYPE_TUNNEL_GENEVE: return "TUNNEL_GENEVE"; + case RTE_PTYPE_TUNNEL_GRENAT: return "TUNNEL_GRENAT"; + default: return "TUNNEL_UNKNOWN"; + } +} + +/* get the name of the inner_l2 packet type */ +const char *rte_get_ptype_inner_l2_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_INNER_L2_MASK) { + case RTE_PTYPE_INNER_L2_ETHER: return "INNER_L2_ETHER"; + case RTE_PTYPE_INNER_L2_ETHER_VLAN: return "INNER_L2_ETHER_VLAN"; + case RTE_PTYPE_INNER_L2_ETHER_QINQ: return "INNER_L2_ETHER_QINQ"; + default: return "INNER_L2_UNKNOWN"; + } +} + +/* get the name of the inner_l3 packet type */ +const char *rte_get_ptype_inner_l3_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_INNER_L3_MASK) { + case RTE_PTYPE_INNER_L3_IPV4: return "INNER_L3_IPV4"; + case RTE_PTYPE_INNER_L3_IPV4_EXT: return "INNER_L3_IPV4_EXT"; + case RTE_PTYPE_INNER_L3_IPV6: return "INNER_L3_IPV6"; + case RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN: + return "INNER_L3_IPV4_EXT_UNKNOWN"; + case RTE_PTYPE_INNER_L3_IPV6_EXT: return "INNER_L3_IPV6_EXT"; + case RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN: + return "INNER_L3_IPV6_EXT_UNKNOWN"; + default: return "INNER_L3_UNKNOWN"; + } +} + +/* get the name of the inner_l4 packet type */ +const char *rte_get_ptype_inner_l4_name(uint32_t ptype) +{ + switch (ptype & RTE_PTYPE_INNER_L4_MASK) { + case RTE_PTYPE_INNER_L4_TCP: return "INNER_L4_TCP"; + case RTE_PTYPE_INNER_L4_UDP: return "INNER_L4_UDP"; + case RTE_PTYPE_INNER_L4_FRAG: return "INNER_L4_FRAG"; + case RTE_PTYPE_INNER_L4_SCTP: return "INNER_L4_SCTP"; + case RTE_PTYPE_INNER_L4_ICMP: return "INNER_L4_ICMP"; + case RTE_PTYPE_INNER_L4_NONFRAG: return "INNER_L4_NONFRAG"; + default: return "INNER_L4_UNKNOWN"; + } +} + +/* write the packet type name into the buffer */ +int rte_get_ptype_name(uint32_t ptype, char *buf, size_t buflen) +{ + int ret; + + if (buflen == 0) + return -1; + + buf[0] = '\0'; + if ((ptype & RTE_PTYPE_ALL_MASK) == RTE_PTYPE_UNKNOWN) { + ret = snprintf(buf, buflen, "UNKNOWN"); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + return 0; + } + + if ((ptype & RTE_PTYPE_L2_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_l2_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_L3_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_l3_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_L4_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_l4_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_TUNNEL_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_tunnel_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_INNER_L2_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_inner_l2_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_INNER_L3_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_inner_l3_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + if ((ptype & RTE_PTYPE_INNER_L4_MASK) != 0) { + ret = snprintf(buf, buflen, "%s ", + rte_get_ptype_inner_l4_name(ptype)); + if (ret < 0) + return -1; + if ((size_t)ret >= buflen) + return -1; + buf += ret; + buflen -= ret; + } + + return 0; +} diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h new file mode 100644 index 00000000..ff6de9d1 --- /dev/null +++ b/lib/librte_mbuf/rte_mbuf_ptype.h @@ -0,0 +1,668 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. + * Copyright 2014-2016 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MBUF_PTYPE_H_ +#define _RTE_MBUF_PTYPE_H_ + +/** + * @file + * RTE Mbuf Packet Types + * + * This file contains declarations for features related to mbuf packet + * types. The packet type gives information about the data carried by the + * mbuf, and is stored in the mbuf in a 32 bits field. + * + * The 32 bits are divided into several fields to mark packet types. Note that + * each field is indexical. + * - Bit 3:0 is for L2 types. + * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types. + * - Bit 11:8 is for L4 or outer L4 (for tunneling case) types. + * - Bit 15:12 is for tunnel types. + * - Bit 19:16 is for inner L2 types. + * - Bit 23:20 is for inner L3 types. + * - Bit 27:24 is for inner L4 types. + * - Bit 31:28 is reserved. + * + * To be compatible with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT, + * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP + * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits. + * + * Note that L3 types values are selected for checking IPV4/IPV6 header from + * performance point of view. Reading annotations of RTE_ETH_IS_IPV4_HDR and + * RTE_ETH_IS_IPV6_HDR is needed for any future changes of L3 type values. + * + * Note that the packet types of the same packet recognized by different + * hardware may be different, as different hardware may have different + * capability of packet type recognition. + * + * examples: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=0x29 + * | 'version'=6, 'next header'=0x3A + * | 'ICMPv6 header'> + * will be recognized on i40e hardware as packet type combination of, + * RTE_PTYPE_L2_ETHER | + * RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + * RTE_PTYPE_TUNNEL_IP | + * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + * RTE_PTYPE_INNER_L4_ICMP. + * + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=0x2F + * | 'GRE header' + * | 'version'=6, 'next header'=0x11 + * | 'UDP header'> + * will be recognized on i40e hardware as packet type combination of, + * RTE_PTYPE_L2_ETHER | + * RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + * RTE_PTYPE_TUNNEL_GRENAT | + * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + * RTE_PTYPE_INNER_L4_UDP. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * No packet type information. + */ +#define RTE_PTYPE_UNKNOWN 0x00000000 +/** + * Ethernet packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=[0x0800|0x86DD]> + */ +#define RTE_PTYPE_L2_ETHER 0x00000001 +/** + * Ethernet packet type for time sync. + * + * Packet format: + * <'ether type'=0x88F7> + */ +#define RTE_PTYPE_L2_ETHER_TIMESYNC 0x00000002 +/** + * ARP (Address Resolution Protocol) packet type. + * + * Packet format: + * <'ether type'=0x0806> + */ +#define RTE_PTYPE_L2_ETHER_ARP 0x00000003 +/** + * LLDP (Link Layer Discovery Protocol) packet type. + * + * Packet format: + * <'ether type'=0x88CC> + */ +#define RTE_PTYPE_L2_ETHER_LLDP 0x00000004 +/** + * NSH (Network Service Header) packet type. + * + * Packet format: + * <'ether type'=0x894F> + */ +#define RTE_PTYPE_L2_ETHER_NSH 0x00000005 +/** + * VLAN packet type. + * + * Packet format: + * <'ether type'=[0x8100]> + */ +#define RTE_PTYPE_L2_ETHER_VLAN 0x00000006 +/** + * QinQ packet type. + * + * Packet format: + * <'ether type'=[0x88A8]> + */ +#define RTE_PTYPE_L2_ETHER_QINQ 0x00000007 +/** + * Mask of layer 2 packet types. + * It is used for outer packet for tunneling cases. + */ +#define RTE_PTYPE_L2_MASK 0x0000000f +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for outer packet for tunneling cases, and does not contain any + * header option. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=5> + */ +#define RTE_PTYPE_L3_IPV4 0x00000010 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for outer packet for tunneling cases, and contains header + * options. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[6-15], 'options'> + */ +#define RTE_PTYPE_L3_IPV4_EXT 0x00000030 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for outer packet for tunneling cases, and does not contain any + * extension header. + * + * Packet format: + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=0x3B> + */ +#define RTE_PTYPE_L3_IPV6 0x00000040 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for outer packet for tunneling cases, and may or maynot contain + * header options. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[5-15], <'options'>> + */ +#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN 0x00000090 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for outer packet for tunneling cases, and contains extension + * headers. + * + * Packet format: + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * 'extension headers'> + */ +#define RTE_PTYPE_L3_IPV6_EXT 0x000000c0 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for outer packet for tunneling cases, and may or maynot contain + * extension headers. + * + * Packet format: + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * <'extension headers'>> + */ +#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN 0x000000e0 +/** + * Mask of layer 3 packet types. + * It is used for outer packet for tunneling cases. + */ +#define RTE_PTYPE_L3_MASK 0x000000f0 +/** + * TCP (Transmission Control Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=6, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=6> + */ +#define RTE_PTYPE_L4_TCP 0x00000100 +/** + * UDP (User Datagram Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17> + */ +#define RTE_PTYPE_L4_UDP 0x00000200 +/** + * Fragmented IP (Internet Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * It refers to those packets of any IP types, which can be recognized as + * fragmented. A fragmented packet cannot be recognized as any other L4 types + * (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP, + * RTE_PTYPE_L4_NONFRAG). + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'MF'=1> + * or, + * <'ether type'=0x0800 + * | 'version'=4, 'frag_offset'!=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=44> + */ +#define RTE_PTYPE_L4_FRAG 0x00000300 +/** + * SCTP (Stream Control Transmission Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=132, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=132> + */ +#define RTE_PTYPE_L4_SCTP 0x00000400 +/** + * ICMP (Internet Control Message Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=1, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=1> + */ +#define RTE_PTYPE_L4_ICMP 0x00000500 +/** + * Non-fragmented IP (Internet Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * It refers to those packets of any IP types, while cannot be recognized as + * any of above L4 types (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, + * RTE_PTYPE_L4_FRAG, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP). + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'!=[6|17|44|132|1]> + */ +#define RTE_PTYPE_L4_NONFRAG 0x00000600 +/** + * Mask of layer 4 packet types. + * It is used for outer packet for tunneling cases. + */ +#define RTE_PTYPE_L4_MASK 0x00000f00 +/** + * IP (Internet Protocol) in IP (Internet Protocol) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=[4|41]> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[4|41]> + */ +#define RTE_PTYPE_TUNNEL_IP 0x00001000 +/** + * GRE (Generic Routing Encapsulation) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=47> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=47> + */ +#define RTE_PTYPE_TUNNEL_GRE 0x00002000 +/** + * VXLAN (Virtual eXtensible Local Area Network) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17 + * | 'destination port'=4798> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17 + * | 'destination port'=4798> + */ +#define RTE_PTYPE_TUNNEL_VXLAN 0x00003000 +/** + * NVGRE (Network Virtualization using Generic Routing Encapsulation) tunneling + * packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=47 + * | 'protocol type'=0x6558> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=47 + * | 'protocol type'=0x6558'> + */ +#define RTE_PTYPE_TUNNEL_NVGRE 0x00004000 +/** + * GENEVE (Generic Network Virtualization Encapsulation) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17 + * | 'destination port'=6081> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17 + * | 'destination port'=6081> + */ +#define RTE_PTYPE_TUNNEL_GENEVE 0x00005000 +/** + * Tunneling packet type of Teredo, VXLAN (Virtual eXtensible Local Area + * Network) or GRE (Generic Routing Encapsulation) could be recognized as this + * packet type, if they can not be recognized independently as of hardware + * capability. + */ +#define RTE_PTYPE_TUNNEL_GRENAT 0x00006000 +/** + * Mask of tunneling packet types. + */ +#define RTE_PTYPE_TUNNEL_MASK 0x0000f000 +/** + * Ethernet packet type. + * It is used for inner packet type only. + * + * Packet format (inner only): + * <'ether type'=[0x800|0x86DD]> + */ +#define RTE_PTYPE_INNER_L2_ETHER 0x00010000 +/** + * Ethernet packet type with VLAN (Virtual Local Area Network) tag. + * + * Packet format (inner only): + * <'ether type'=[0x800|0x86DD], vlan=[1-4095]> + */ +#define RTE_PTYPE_INNER_L2_ETHER_VLAN 0x00020000 +/** + * QinQ packet type. + * + * Packet format: + * <'ether type'=[0x88A8]> + */ +#define RTE_PTYPE_INNER_L2_ETHER_QINQ 0x00030000 +/** + * Mask of inner layer 2 packet types. + */ +#define RTE_PTYPE_INNER_L2_MASK 0x000f0000 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for inner packet only, and does not contain any header option. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=5> + */ +#define RTE_PTYPE_INNER_L3_IPV4 0x00100000 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for inner packet only, and contains header options. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[6-15], 'options'> + */ +#define RTE_PTYPE_INNER_L3_IPV4_EXT 0x00200000 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for inner packet only, and does not contain any extension header. + * + * Packet format (inner only): + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=0x3B> + */ +#define RTE_PTYPE_INNER_L3_IPV6 0x00300000 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for inner packet only, and may or maynot contain header options. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[5-15], <'options'>> + */ +#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for inner packet only, and contains extension headers. + * + * Packet format (inner only): + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * 'extension headers'> + */ +#define RTE_PTYPE_INNER_L3_IPV6_EXT 0x00500000 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for inner packet only, and may or maynot contain extension + * headers. + * + * Packet format (inner only): + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * <'extension headers'>> + */ +#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000 +/** + * Mask of inner layer 3 packet types. + */ +#define RTE_PTYPE_INNER_L3_MASK 0x00f00000 +/** + * TCP (Transmission Control Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=6, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=6> + */ +#define RTE_PTYPE_INNER_L4_TCP 0x01000000 +/** + * UDP (User Datagram Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17> + */ +#define RTE_PTYPE_INNER_L4_UDP 0x02000000 +/** + * Fragmented IP (Internet Protocol) packet type. + * It is used for inner packet only, and may or maynot have layer 4 packet. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'MF'=1> + * or, + * <'ether type'=0x0800 + * | 'version'=4, 'frag_offset'!=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=44> + */ +#define RTE_PTYPE_INNER_L4_FRAG 0x03000000 +/** + * SCTP (Stream Control Transmission Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=132, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=132> + */ +#define RTE_PTYPE_INNER_L4_SCTP 0x04000000 +/** + * ICMP (Internet Control Message Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=1, 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=1> + */ +#define RTE_PTYPE_INNER_L4_ICMP 0x05000000 +/** + * Non-fragmented IP (Internet Protocol) packet type. + * It is used for inner packet only, and may or maynot have other unknown layer + * 4 packet types. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0, 'frag_offset'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'!=[6|17|44|132|1]> + */ +#define RTE_PTYPE_INNER_L4_NONFRAG 0x06000000 +/** + * Mask of inner layer 4 packet types. + */ +#define RTE_PTYPE_INNER_L4_MASK 0x0f000000 +/** + * All valid layer masks. + */ +#define RTE_PTYPE_ALL_MASK 0x0fffffff + +/** + * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by + * one, bit 4 is selected to be used for IPv4 only. Then checking bit 4 can + * determine if it is an IPV4 packet. + */ +#define RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4) + +/** + * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by + * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can + * determine if it is an IPV4 packet. + */ +#define RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6) + +/* Check if it is a tunneling packet */ +#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) & \ + (RTE_PTYPE_TUNNEL_MASK | \ + RTE_PTYPE_INNER_L2_MASK | \ + RTE_PTYPE_INNER_L3_MASK | \ + RTE_PTYPE_INNER_L4_MASK)) + +/** + * Get the name of the l2 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_l2_name(uint32_t ptype); + +/** + * Get the name of the l3 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_l3_name(uint32_t ptype); + +/** + * Get the name of the l4 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_l4_name(uint32_t ptype); + +/** + * Get the name of the tunnel packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_tunnel_name(uint32_t ptype); + +/** + * Get the name of the inner_l2 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_inner_l2_name(uint32_t ptype); + +/** + * Get the name of the inner_l3 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_inner_l3_name(uint32_t ptype); + +/** + * Get the name of the inner_l4 packet type + * + * @param ptype + * The packet type value. + * @return + * A non-null string describing the packet type. + */ +const char *rte_get_ptype_inner_l4_name(uint32_t ptype); + +/** + * Write the packet type name into the buffer + * + * @param ptype + * The packet type value. + * @param buf + * The buffer where the string is written. + * @param buflen + * The length of the buffer. + * @return + * - 0 on success + * - (-1) if the buffer is too small + */ +int rte_get_ptype_name(uint32_t ptype, char *buf, size_t buflen); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MBUF_PTYPE_H_ */ diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map index e10f6bdc..6e2ea845 100644 --- a/lib/librte_mbuf/rte_mbuf_version.map +++ b/lib/librte_mbuf/rte_mbuf_version.map @@ -18,3 +18,20 @@ DPDK_2.1 { rte_pktmbuf_pool_create; } DPDK_2.0; + +DPDK_16.11 { + global: + + __rte_pktmbuf_read; + rte_get_ptype_inner_l2_name; + rte_get_ptype_inner_l3_name; + rte_get_ptype_inner_l4_name; + rte_get_ptype_l2_name; + rte_get_ptype_l3_name; + rte_get_ptype_l4_name; + rte_get_ptype_name; + rte_get_ptype_tunnel_name; + rte_get_rx_ol_flag_list; + rte_get_tx_ol_flag_list; + +} DPDK_2.1; diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c index ad7c470e..aa513b97 100644 --- a/lib/librte_mempool/rte_mempool.c +++ b/lib/librte_mempool/rte_mempool.c @@ -55,7 +55,6 @@ #include #include #include -#include #include #include #include @@ -911,9 +910,8 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, /* * Create the mempool over already allocated chunk of memory. * That external memory buffer can consists of physically disjoint pages. - * Setting vaddr to NULL, makes mempool to fallback to original behaviour - * and allocate space for mempool and it's elements as one big chunk of - * physically continuos memory. + * Setting vaddr to NULL, makes mempool to fallback to rte_mempool_create() + * behavior. */ struct rte_mempool * rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h index 059ad9e5..440f3b1b 100644 --- a/lib/librte_mempool/rte_mempool.h +++ b/lib/librte_mempool/rte_mempool.h @@ -75,6 +75,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -216,6 +217,7 @@ struct rte_mempool { * RTE_MEMPOOL_NAMESIZE next time the ABI changes */ char name[RTE_MEMZONE_NAMESIZE]; /**< Name of mempool. */ + RTE_STD_C11 union { void *pool_data; /**< Ring or pool to store objects. */ uint64_t pool_id; /**< External mempool identifier. */ @@ -587,10 +589,8 @@ typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *); /** * Create a new mempool named *name* in memory. * - * This function uses ``memzone_reserve()`` to allocate memory. The + * This function uses ``rte_memzone_reserve()`` to allocate memory. The * pool contains n elements of elt_size. Its size is set to n. - * All elements of the mempool are allocated together with the mempool header, - * in one physically continuous chunk of memory. * * @param name * The name of the mempool. @@ -746,7 +746,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, * * The mempool is allocated and initialized, but it is not populated: no * memory is allocated for the mempool elements. The user has to call - * rte_mempool_populate_*() or to add memory chunks to the pool. Once + * rte_mempool_populate_*() to add memory chunks to the pool. Once * populated, the user may also want to initialize each object with * rte_mempool_obj_iter(). * @@ -798,6 +798,10 @@ rte_mempool_free(struct rte_mempool *mp); * Add a virtually and physically contiguous memory chunk in the pool * where objects can be instanciated. * + * If the given physical address is unknown (paddr = RTE_BAD_PHYS_ADDR), + * the chunk doesn't need to be physically contiguous (only virtually), + * and allocated objects may span two pages. + * * @param mp * A pointer to the mempool structure. * @param vaddr @@ -946,7 +950,7 @@ uint32_t rte_mempool_mem_iter(struct rte_mempool *mp, rte_mempool_mem_cb_t *mem_cb, void *mem_cb_arg); /** - * Dump the status of the mempool to the console. + * Dump the status of the mempool to a file. * * @param f * A pointer to a file for output diff --git a/lib/librte_meter/rte_meter.h b/lib/librte_meter/rte_meter.h index 2cd8d814..2ab71849 100644 --- a/lib/librte_meter/rte_meter.h +++ b/lib/librte_meter/rte_meter.h @@ -232,13 +232,15 @@ rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m, n_periods = time_diff / m->cir_period; m->time += n_periods * m->cir_period; + /* Put the tokens overflowing from tc into te bucket */ tc = m->tc + n_periods * m->cir_bytes_per_period; - if (tc > m->cbs) + te = m->te; + if (tc > m->cbs) { + te += (tc - m->cbs); + if (te > m->ebs) + te = m->ebs; tc = m->cbs; - - te = m->te + n_periods * m->cir_bytes_per_period; - if (te > m->ebs) - te = m->ebs; + } /* Color logic */ if (tc >= pkt_len) { @@ -271,13 +273,15 @@ rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m, n_periods = time_diff / m->cir_period; m->time += n_periods * m->cir_period; + /* Put the tokens overflowing from tc into te bucket */ tc = m->tc + n_periods * m->cir_bytes_per_period; - if (tc > m->cbs) + te = m->te; + if (tc > m->cbs) { + te += (tc - m->cbs); + if (te > m->ebs) + te = m->ebs; tc = m->cbs; - - te = m->te + n_periods * m->cir_bytes_per_period; - if (te > m->ebs) - te = m->ebs; + } /* Color logic */ if ((pkt_color == e_RTE_METER_GREEN) && (tc >= pkt_len)) { diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile index ad2e482d..20cf6644 100644 --- a/lib/librte_net/Makefile +++ b/lib/librte_net/Makefile @@ -31,10 +31,20 @@ include $(RTE_SDK)/mk/rte.vars.mk +LIB = librte_net.a + CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 +EXPORT_MAP := rte_net_version.map +LIBABIVER := 1 + +SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c + # install includes -SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_sctp.h rte_icmp.h rte_arp.h +SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h +SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_sctp.h rte_icmp.h rte_arp.h +SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_ether.h rte_gre.h rte_net.h +DEPDIRS-$(CONFIG_RTE_LIBRTE_NET) += lib/librte_eal lib/librte_mbuf -include $(RTE_SDK)/mk/rte.install.mk +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_net/rte_ether.h b/lib/librte_net/rte_ether.h new file mode 100644 index 00000000..ff3d0654 --- /dev/null +++ b/lib/librte_net/rte_ether.h @@ -0,0 +1,417 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETHER_H_ +#define _RTE_ETHER_H_ + +/** + * @file + * + * Ethernet Helpers in RTE + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include +#include +#include +#include + +#define ETHER_ADDR_LEN 6 /**< Length of Ethernet address. */ +#define ETHER_TYPE_LEN 2 /**< Length of Ethernet type field. */ +#define ETHER_CRC_LEN 4 /**< Length of Ethernet CRC. */ +#define ETHER_HDR_LEN \ + (ETHER_ADDR_LEN * 2 + ETHER_TYPE_LEN) /**< Length of Ethernet header. */ +#define ETHER_MIN_LEN 64 /**< Minimum frame len, including CRC. */ +#define ETHER_MAX_LEN 1518 /**< Maximum frame len, including CRC. */ +#define ETHER_MTU \ + (ETHER_MAX_LEN - ETHER_HDR_LEN - ETHER_CRC_LEN) /**< Ethernet MTU. */ + +#define ETHER_MAX_VLAN_FRAME_LEN \ + (ETHER_MAX_LEN + 4) /**< Maximum VLAN frame length, including CRC. */ + +#define ETHER_MAX_JUMBO_FRAME_LEN \ + 0x3F00 /**< Maximum Jumbo frame length, including CRC. */ + +#define ETHER_MAX_VLAN_ID 4095 /**< Maximum VLAN ID. */ + +#define ETHER_MIN_MTU 68 /**< Minimum MTU for IPv4 packets, see RFC 791. */ + +/** + * Ethernet address: + * A universally administered address is uniquely assigned to a device by its + * manufacturer. The first three octets (in transmission order) contain the + * Organizationally Unique Identifier (OUI). The following three (MAC-48 and + * EUI-48) octets are assigned by that organization with the only constraint + * of uniqueness. + * A locally administered address is assigned to a device by a network + * administrator and does not contain OUIs. + * See http://standards.ieee.org/regauth/groupmac/tutorial.html + */ +struct ether_addr { + uint8_t addr_bytes[ETHER_ADDR_LEN]; /**< Addr bytes in tx order */ +} __attribute__((__packed__)); + +#define ETHER_LOCAL_ADMIN_ADDR 0x02 /**< Locally assigned Eth. address. */ +#define ETHER_GROUP_ADDR 0x01 /**< Multicast or broadcast Eth. address. */ + +/** + * Check if two Ethernet addresses are the same. + * + * @param ea1 + * A pointer to the first ether_addr structure containing + * the ethernet address. + * @param ea2 + * A pointer to the second ether_addr structure containing + * the ethernet address. + * + * @return + * True (1) if the given two ethernet address are the same; + * False (0) otherwise. + */ +static inline int is_same_ether_addr(const struct ether_addr *ea1, + const struct ether_addr *ea2) +{ + int i; + for (i = 0; i < ETHER_ADDR_LEN; i++) + if (ea1->addr_bytes[i] != ea2->addr_bytes[i]) + return 0; + return 1; +} + +/** + * Check if an Ethernet address is filled with zeros. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is filled with zeros; + * false (0) otherwise. + */ +static inline int is_zero_ether_addr(const struct ether_addr *ea) +{ + int i; + for (i = 0; i < ETHER_ADDR_LEN; i++) + if (ea->addr_bytes[i] != 0x00) + return 0; + return 1; +} + +/** + * Check if an Ethernet address is a unicast address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a unicast address; + * false (0) otherwise. + */ +static inline int is_unicast_ether_addr(const struct ether_addr *ea) +{ + return (ea->addr_bytes[0] & ETHER_GROUP_ADDR) == 0; +} + +/** + * Check if an Ethernet address is a multicast address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a multicast address; + * false (0) otherwise. + */ +static inline int is_multicast_ether_addr(const struct ether_addr *ea) +{ + return ea->addr_bytes[0] & ETHER_GROUP_ADDR; +} + +/** + * Check if an Ethernet address is a broadcast address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a broadcast address; + * false (0) otherwise. + */ +static inline int is_broadcast_ether_addr(const struct ether_addr *ea) +{ + const unaligned_uint16_t *ea_words = (const unaligned_uint16_t *)ea; + + return (ea_words[0] == 0xFFFF && ea_words[1] == 0xFFFF && + ea_words[2] == 0xFFFF); +} + +/** + * Check if an Ethernet address is a universally assigned address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a universally assigned address; + * false (0) otherwise. + */ +static inline int is_universal_ether_addr(const struct ether_addr *ea) +{ + return (ea->addr_bytes[0] & ETHER_LOCAL_ADMIN_ADDR) == 0; +} + +/** + * Check if an Ethernet address is a locally assigned address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a locally assigned address; + * false (0) otherwise. + */ +static inline int is_local_admin_ether_addr(const struct ether_addr *ea) +{ + return (ea->addr_bytes[0] & ETHER_LOCAL_ADMIN_ADDR) != 0; +} + +/** + * Check if an Ethernet address is a valid address. Checks that the address is a + * unicast address and is not filled with zeros. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is valid; + * false (0) otherwise. + */ +static inline int is_valid_assigned_ether_addr(const struct ether_addr *ea) +{ + return is_unicast_ether_addr(ea) && (!is_zero_ether_addr(ea)); +} + +/** + * Generate a random Ethernet address that is locally administered + * and not multicast. + * @param addr + * A pointer to Ethernet address. + */ +static inline void eth_random_addr(uint8_t *addr) +{ + uint64_t rand = rte_rand(); + uint8_t *p = (uint8_t *)&rand; + + rte_memcpy(addr, p, ETHER_ADDR_LEN); + addr[0] &= ~ETHER_GROUP_ADDR; /* clear multicast bit */ + addr[0] |= ETHER_LOCAL_ADMIN_ADDR; /* set local assignment bit */ +} + +/** + * Fast copy an Ethernet address. + * + * @param ea_from + * A pointer to a ether_addr structure holding the Ethernet address to copy. + * @param ea_to + * A pointer to a ether_addr structure where to copy the Ethernet address. + */ +static inline void ether_addr_copy(const struct ether_addr *ea_from, + struct ether_addr *ea_to) +{ +#ifdef __INTEL_COMPILER + uint16_t *from_words = (uint16_t *)(ea_from->addr_bytes); + uint16_t *to_words = (uint16_t *)(ea_to->addr_bytes); + + to_words[0] = from_words[0]; + to_words[1] = from_words[1]; + to_words[2] = from_words[2]; +#else + /* + * Use the common way, because of a strange gcc warning. + */ + *ea_to = *ea_from; +#endif +} + +#define ETHER_ADDR_FMT_SIZE 18 +/** + * Format 48bits Ethernet address in pattern xx:xx:xx:xx:xx:xx. + * + * @param buf + * A pointer to buffer contains the formatted MAC address. + * @param size + * The format buffer size. + * @param eth_addr + * A pointer to a ether_addr structure. + */ +static inline void +ether_format_addr(char *buf, uint16_t size, + const struct ether_addr *eth_addr) +{ + snprintf(buf, size, "%02X:%02X:%02X:%02X:%02X:%02X", + eth_addr->addr_bytes[0], + eth_addr->addr_bytes[1], + eth_addr->addr_bytes[2], + eth_addr->addr_bytes[3], + eth_addr->addr_bytes[4], + eth_addr->addr_bytes[5]); +} + +/** + * Ethernet header: Contains the destination address, source address + * and frame type. + */ +struct ether_hdr { + struct ether_addr d_addr; /**< Destination address. */ + struct ether_addr s_addr; /**< Source address. */ + uint16_t ether_type; /**< Frame type. */ +} __attribute__((__packed__)); + +/** + * Ethernet VLAN Header. + * Contains the 16-bit VLAN Tag Control Identifier and the Ethernet type + * of the encapsulated frame. + */ +struct vlan_hdr { + uint16_t vlan_tci; /**< Priority (3) + CFI (1) + Identifier Code (12) */ + uint16_t eth_proto;/**< Ethernet type of encapsulated frame. */ +} __attribute__((__packed__)); + +/** + * VXLAN protocol header. + * Contains the 8-bit flag, 24-bit VXLAN Network Identifier and + * Reserved fields (24 bits and 8 bits) + */ +struct vxlan_hdr { + uint32_t vx_flags; /**< flag (8) + Reserved (24). */ + uint32_t vx_vni; /**< VNI (24) + Reserved (8). */ +} __attribute__((__packed__)); + +/* Ethernet frame types */ +#define ETHER_TYPE_IPv4 0x0800 /**< IPv4 Protocol. */ +#define ETHER_TYPE_IPv6 0x86DD /**< IPv6 Protocol. */ +#define ETHER_TYPE_ARP 0x0806 /**< Arp Protocol. */ +#define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */ +#define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */ +#define ETHER_TYPE_QINQ 0x88A8 /**< IEEE 802.1ad QinQ tagging. */ +#define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */ +#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */ +#define ETHER_TYPE_TEB 0x6558 /**< Transparent Ethernet Bridging. */ + +#define ETHER_VXLAN_HLEN (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr)) +/**< VXLAN tunnel header length. */ + +/** + * Extract VLAN tag information into mbuf + * + * Software version of VLAN stripping + * + * @param m + * The packet mbuf. + * @return + * - 0: Success + * - 1: not a vlan packet + */ +static inline int rte_vlan_strip(struct rte_mbuf *m) +{ + struct ether_hdr *eh + = rte_pktmbuf_mtod(m, struct ether_hdr *); + + if (eh->ether_type != rte_cpu_to_be_16(ETHER_TYPE_VLAN)) + return -1; + + struct vlan_hdr *vh = (struct vlan_hdr *)(eh + 1); + m->ol_flags |= PKT_RX_VLAN_PKT; + m->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci); + + /* Copy ether header over rather than moving whole packet */ + memmove(rte_pktmbuf_adj(m, sizeof(struct vlan_hdr)), + eh, 2 * ETHER_ADDR_LEN); + + return 0; +} + +/** + * Insert VLAN tag into mbuf. + * + * Software version of VLAN unstripping + * + * @param m + * The packet mbuf. + * @return + * - 0: On success + * -EPERM: mbuf is is shared overwriting would be unsafe + * -ENOSPC: not enough headroom in mbuf + */ +static inline int rte_vlan_insert(struct rte_mbuf **m) +{ + struct ether_hdr *oh, *nh; + struct vlan_hdr *vh; + + /* Can't insert header if mbuf is shared */ + if (rte_mbuf_refcnt_read(*m) > 1) { + struct rte_mbuf *copy; + + copy = rte_pktmbuf_clone(*m, (*m)->pool); + if (unlikely(copy == NULL)) + return -ENOMEM; + rte_pktmbuf_free(*m); + *m = copy; + } + + oh = rte_pktmbuf_mtod(*m, struct ether_hdr *); + nh = (struct ether_hdr *) + rte_pktmbuf_prepend(*m, sizeof(struct vlan_hdr)); + if (nh == NULL) + return -ENOSPC; + + memmove(nh, oh, 2 * ETHER_ADDR_LEN); + nh->ether_type = rte_cpu_to_be_16(ETHER_TYPE_VLAN); + + vh = (struct vlan_hdr *) (nh + 1); + vh->vlan_tci = rte_cpu_to_be_16((*m)->vlan_tci); + + return 0; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ETHER_H_ */ diff --git a/lib/librte_net/rte_gre.h b/lib/librte_net/rte_gre.h new file mode 100644 index 00000000..46568ff5 --- /dev/null +++ b/lib/librte_net/rte_gre.h @@ -0,0 +1,71 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_GRE_H_ +#define _RTE_GRE_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * GRE Header + */ +struct gre_hdr { +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + uint16_t res2:4; /**< Reserved */ + uint16_t s:1; /**< Sequence Number Present bit */ + uint16_t k:1; /**< Key Present bit */ + uint16_t res1:1; /**< Reserved */ + uint16_t c:1; /**< Checksum Present bit */ + uint16_t ver:3; /**< Version Number */ + uint16_t res3:5; /**< Reserved */ +#elif RTE_BYTE_ORDER == RTE_BIG_ENDIAN + uint16_t c:1; /**< Checksum Present bit */ + uint16_t res1:1; /**< Reserved */ + uint16_t k:1; /**< Key Present bit */ + uint16_t s:1; /**< Sequence Number Present bit */ + uint16_t res2:4; /**< Reserved */ + uint16_t res3:5; /**< Reserved */ + uint16_t ver:3; /**< Version Number */ +#endif + uint16_t proto; /**< Protocol Type */ +} __attribute__((__packed__)); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_GRE_H_ */ diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h index 5b7554ab..4491b86e 100644 --- a/lib/librte_net/rte_ip.h +++ b/lib/librte_net/rte_ip.h @@ -229,6 +229,77 @@ rte_raw_cksum(const void *buf, size_t len) return __rte_raw_cksum_reduce(sum); } +/** + * Compute the raw (non complemented) checksum of a packet. + * + * @param m + * The pointer to the mbuf. + * @param off + * The offset in bytes to start the checksum. + * @param len + * The length in bytes of the data to ckecksum. + * @param cksum + * A pointer to the checksum, filled on success. + * @return + * 0 on success, -1 on error (bad length or offset). + */ +static inline int +rte_raw_cksum_mbuf(const struct rte_mbuf *m, uint32_t off, uint32_t len, + uint16_t *cksum) +{ + const struct rte_mbuf *seg; + const char *buf; + uint32_t sum, tmp; + uint32_t seglen, done; + + /* easy case: all data in the first segment */ + if (off + len <= rte_pktmbuf_data_len(m)) { + *cksum = rte_raw_cksum(rte_pktmbuf_mtod_offset(m, + const char *, off), len); + return 0; + } + + if (unlikely(off + len > rte_pktmbuf_pkt_len(m))) + return -1; /* invalid params, return a dummy value */ + + /* else browse the segment to find offset */ + seglen = 0; + for (seg = m; seg != NULL; seg = seg->next) { + seglen = rte_pktmbuf_data_len(seg); + if (off < seglen) + break; + off -= seglen; + } + seglen -= off; + buf = rte_pktmbuf_mtod_offset(seg, const char *, off); + if (seglen >= len) { + /* all in one segment */ + *cksum = rte_raw_cksum(buf, len); + return 0; + } + + /* hard case: process checksum of several segments */ + sum = 0; + done = 0; + for (;;) { + tmp = __rte_raw_cksum(buf, seglen, 0); + if (done & 1) + tmp = rte_bswap16(tmp); + sum += tmp; + done += seglen; + if (done == len) + break; + seg = seg->next; + buf = rte_pktmbuf_mtod(seg, const char *); + seglen = rte_pktmbuf_data_len(seg); + if (seglen > len - done) + seglen = len - done; + } + + *cksum = __rte_raw_cksum_reduce(sum); + return 0; +} + /** * Process the IPv4 checksum of an IPv4 header. * diff --git a/lib/librte_net/rte_net.c b/lib/librte_net/rte_net.c new file mode 100644 index 00000000..a8c7aff9 --- /dev/null +++ b/lib/librte_net/rte_net.c @@ -0,0 +1,517 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* get l3 packet type from ip6 next protocol */ +static uint32_t +ptype_l3_ip6(uint8_t ip6_proto) +{ + static const uint32_t ip6_ext_proto_map[256] = { + [IPPROTO_HOPOPTS] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + [IPPROTO_ROUTING] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + [IPPROTO_FRAGMENT] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + [IPPROTO_ESP] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + [IPPROTO_AH] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + [IPPROTO_DSTOPTS] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6, + }; + + return RTE_PTYPE_L3_IPV6 + ip6_ext_proto_map[ip6_proto]; +} + +/* get l3 packet type from ip version and header length */ +static uint32_t +ptype_l3_ip(uint8_t ipv_ihl) +{ + static const uint32_t ptype_l3_ip_proto_map[256] = { + [0x45] = RTE_PTYPE_L3_IPV4, + [0x46] = RTE_PTYPE_L3_IPV4_EXT, + [0x47] = RTE_PTYPE_L3_IPV4_EXT, + [0x48] = RTE_PTYPE_L3_IPV4_EXT, + [0x49] = RTE_PTYPE_L3_IPV4_EXT, + [0x4A] = RTE_PTYPE_L3_IPV4_EXT, + [0x4B] = RTE_PTYPE_L3_IPV4_EXT, + [0x4C] = RTE_PTYPE_L3_IPV4_EXT, + [0x4D] = RTE_PTYPE_L3_IPV4_EXT, + [0x4E] = RTE_PTYPE_L3_IPV4_EXT, + [0x4F] = RTE_PTYPE_L3_IPV4_EXT, + }; + + return ptype_l3_ip_proto_map[ipv_ihl]; +} + +/* get l4 packet type from proto */ +static uint32_t +ptype_l4(uint8_t proto) +{ + static const uint32_t ptype_l4_proto[256] = { + [IPPROTO_UDP] = RTE_PTYPE_L4_UDP, + [IPPROTO_TCP] = RTE_PTYPE_L4_TCP, + [IPPROTO_SCTP] = RTE_PTYPE_L4_SCTP, + }; + + return ptype_l4_proto[proto]; +} + +/* get inner l3 packet type from ip6 next protocol */ +static uint32_t +ptype_inner_l3_ip6(uint8_t ip6_proto) +{ + static const uint32_t ptype_inner_ip6_ext_proto_map[256] = { + [IPPROTO_HOPOPTS] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + [IPPROTO_ROUTING] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + [IPPROTO_FRAGMENT] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + [IPPROTO_ESP] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + [IPPROTO_AH] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + [IPPROTO_DSTOPTS] = RTE_PTYPE_INNER_L3_IPV6_EXT - + RTE_PTYPE_INNER_L3_IPV6, + }; + + return RTE_PTYPE_INNER_L3_IPV6 + + ptype_inner_ip6_ext_proto_map[ip6_proto]; +} + +/* get inner l3 packet type from ip version and header length */ +static uint32_t +ptype_inner_l3_ip(uint8_t ipv_ihl) +{ + static const uint32_t ptype_inner_l3_ip_proto_map[256] = { + [0x45] = RTE_PTYPE_INNER_L3_IPV4, + [0x46] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x47] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x48] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x49] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4A] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4B] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4C] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4D] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4E] = RTE_PTYPE_INNER_L3_IPV4_EXT, + [0x4F] = RTE_PTYPE_INNER_L3_IPV4_EXT, + }; + + return ptype_inner_l3_ip_proto_map[ipv_ihl]; +} + +/* get inner l4 packet type from proto */ +static uint32_t +ptype_inner_l4(uint8_t proto) +{ + static const uint32_t ptype_inner_l4_proto[256] = { + [IPPROTO_UDP] = RTE_PTYPE_INNER_L4_UDP, + [IPPROTO_TCP] = RTE_PTYPE_INNER_L4_TCP, + [IPPROTO_SCTP] = RTE_PTYPE_INNER_L4_SCTP, + }; + + return ptype_inner_l4_proto[proto]; +} + +/* get the tunnel packet type if any, update proto and off. */ +static uint32_t +ptype_tunnel(uint16_t *proto, const struct rte_mbuf *m, + uint32_t *off) +{ + switch (*proto) { + case IPPROTO_GRE: { + static const uint8_t opt_len[16] = { + [0x0] = 4, + [0x1] = 8, + [0x2] = 8, + [0x8] = 8, + [0x3] = 12, + [0x9] = 12, + [0xa] = 12, + [0xb] = 16, + }; + const struct gre_hdr *gh; + struct gre_hdr gh_copy; + uint16_t flags; + + gh = rte_pktmbuf_read(m, *off, sizeof(*gh), &gh_copy); + if (unlikely(gh == NULL)) + return 0; + + flags = rte_be_to_cpu_16(*(const uint16_t *)gh); + flags >>= 12; + if (opt_len[flags] == 0) + return 0; + + *off += opt_len[flags]; + *proto = gh->proto; + if (*proto == rte_cpu_to_be_16(ETHER_TYPE_TEB)) + return RTE_PTYPE_TUNNEL_NVGRE; + else + return RTE_PTYPE_TUNNEL_GRE; + } + case IPPROTO_IPIP: + *proto = rte_cpu_to_be_16(ETHER_TYPE_IPv4); + return RTE_PTYPE_TUNNEL_IP; + case IPPROTO_IPV6: + *proto = rte_cpu_to_be_16(ETHER_TYPE_IPv6); + return RTE_PTYPE_TUNNEL_IP; /* IP is also valid for IPv6 */ + default: + return 0; + } +} + +/* get the ipv4 header length */ +static uint8_t +ip4_hlen(const struct ipv4_hdr *hdr) +{ + return (hdr->version_ihl & 0xf) * 4; +} + +/* parse ipv6 extended headers, update offset and return next proto */ +static uint16_t +skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off, + int *frag) +{ + struct ext_hdr { + uint8_t next_hdr; + uint8_t len; + }; + const struct ext_hdr *xh; + struct ext_hdr xh_copy; + unsigned int i; + + *frag = 0; + +#define MAX_EXT_HDRS 5 + for (i = 0; i < MAX_EXT_HDRS; i++) { + switch (proto) { + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: + xh = rte_pktmbuf_read(m, *off, sizeof(*xh), + &xh_copy); + if (xh == NULL) + return 0; + *off += (xh->len + 1) * 8; + proto = xh->next_hdr; + break; + case IPPROTO_FRAGMENT: + xh = rte_pktmbuf_read(m, *off, sizeof(*xh), + &xh_copy); + if (xh == NULL) + return 0; + *off += 8; + proto = xh->next_hdr; + *frag = 1; + return proto; /* this is always the last ext hdr */ + case IPPROTO_NONE: + return 0; + default: + return proto; + } + } + return 0; +} + +/* parse mbuf data to get packet type */ +uint32_t rte_net_get_ptype(const struct rte_mbuf *m, + struct rte_net_hdr_lens *hdr_lens, uint32_t layers) +{ + struct rte_net_hdr_lens local_hdr_lens; + const struct ether_hdr *eh; + struct ether_hdr eh_copy; + uint32_t pkt_type = RTE_PTYPE_L2_ETHER; + uint32_t off = 0; + uint16_t proto; + + if (hdr_lens == NULL) + hdr_lens = &local_hdr_lens; + + eh = rte_pktmbuf_read(m, off, sizeof(*eh), &eh_copy); + if (unlikely(eh == NULL)) + return 0; + proto = eh->ether_type; + off = sizeof(*eh); + hdr_lens->l2_len = off; + + if ((layers & RTE_PTYPE_L2_MASK) == 0) + return 0; + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) + goto l3; /* fast path if packet is IPv4 */ + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) { + const struct vlan_hdr *vh; + struct vlan_hdr vh_copy; + + pkt_type = RTE_PTYPE_L2_ETHER_VLAN; + vh = rte_pktmbuf_read(m, off, sizeof(*vh), &vh_copy); + if (unlikely(vh == NULL)) + return pkt_type; + off += sizeof(*vh); + hdr_lens->l2_len += sizeof(*vh); + proto = vh->eth_proto; + } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_QINQ)) { + const struct vlan_hdr *vh; + struct vlan_hdr vh_copy; + + pkt_type = RTE_PTYPE_L2_ETHER_QINQ; + vh = rte_pktmbuf_read(m, off + sizeof(*vh), sizeof(*vh), + &vh_copy); + if (unlikely(vh == NULL)) + return pkt_type; + off += 2 * sizeof(*vh); + hdr_lens->l2_len += 2 * sizeof(*vh); + proto = vh->eth_proto; + } + + l3: + if ((layers & RTE_PTYPE_L3_MASK) == 0) + return pkt_type; + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { + const struct ipv4_hdr *ip4h; + struct ipv4_hdr ip4h_copy; + + ip4h = rte_pktmbuf_read(m, off, sizeof(*ip4h), &ip4h_copy); + if (unlikely(ip4h == NULL)) + return pkt_type; + + pkt_type |= ptype_l3_ip(ip4h->version_ihl); + hdr_lens->l3_len = ip4_hlen(ip4h); + off += hdr_lens->l3_len; + + if ((layers & RTE_PTYPE_L4_MASK) == 0) + return pkt_type; + + if (ip4h->fragment_offset & rte_cpu_to_be_16( + IPV4_HDR_OFFSET_MASK | IPV4_HDR_MF_FLAG)) { + pkt_type |= RTE_PTYPE_L4_FRAG; + hdr_lens->l4_len = 0; + return pkt_type; + } + proto = ip4h->next_proto_id; + pkt_type |= ptype_l4(proto); + } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) { + const struct ipv6_hdr *ip6h; + struct ipv6_hdr ip6h_copy; + int frag = 0; + + ip6h = rte_pktmbuf_read(m, off, sizeof(*ip6h), &ip6h_copy); + if (unlikely(ip6h == NULL)) + return pkt_type; + + proto = ip6h->proto; + hdr_lens->l3_len = sizeof(*ip6h); + off += hdr_lens->l3_len; + pkt_type |= ptype_l3_ip6(proto); + if ((pkt_type & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6_EXT) { + proto = skip_ip6_ext(proto, m, &off, &frag); + hdr_lens->l3_len = off - hdr_lens->l2_len; + } + if (proto == 0) + return pkt_type; + + if ((layers & RTE_PTYPE_L4_MASK) == 0) + return pkt_type; + + if (frag) { + pkt_type |= RTE_PTYPE_L4_FRAG; + hdr_lens->l4_len = 0; + return pkt_type; + } + pkt_type |= ptype_l4(proto); + } + + if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP) { + hdr_lens->l4_len = sizeof(struct udp_hdr); + return pkt_type; + } else if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP) { + const struct tcp_hdr *th; + struct tcp_hdr th_copy; + + th = rte_pktmbuf_read(m, off, sizeof(*th), &th_copy); + if (unlikely(th == NULL)) + return pkt_type & (RTE_PTYPE_L2_MASK | + RTE_PTYPE_L3_MASK); + hdr_lens->l4_len = (th->data_off & 0xf0) >> 2; + return pkt_type; + } else if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) { + hdr_lens->l4_len = sizeof(struct sctp_hdr); + return pkt_type; + } else { + uint32_t prev_off = off; + + hdr_lens->l4_len = 0; + + if ((layers & RTE_PTYPE_TUNNEL_MASK) == 0) + return pkt_type; + + pkt_type |= ptype_tunnel(&proto, m, &off); + hdr_lens->tunnel_len = off - prev_off; + } + + /* same job for inner header: we need to duplicate the code + * because the packet types do not have the same value. + */ + if ((layers & RTE_PTYPE_INNER_L2_MASK) == 0) + return pkt_type; + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_TEB)) { + eh = rte_pktmbuf_read(m, off, sizeof(*eh), &eh_copy); + if (unlikely(eh == NULL)) + return pkt_type; + pkt_type |= RTE_PTYPE_INNER_L2_ETHER; + proto = eh->ether_type; + off += sizeof(*eh); + hdr_lens->inner_l2_len = sizeof(*eh); + } + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) { + const struct vlan_hdr *vh; + struct vlan_hdr vh_copy; + + pkt_type &= ~RTE_PTYPE_INNER_L2_MASK; + pkt_type |= RTE_PTYPE_INNER_L2_ETHER_VLAN; + vh = rte_pktmbuf_read(m, off, sizeof(*vh), &vh_copy); + if (unlikely(vh == NULL)) + return pkt_type; + off += sizeof(*vh); + hdr_lens->inner_l2_len += sizeof(*vh); + proto = vh->eth_proto; + } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_QINQ)) { + const struct vlan_hdr *vh; + struct vlan_hdr vh_copy; + + pkt_type &= ~RTE_PTYPE_INNER_L2_MASK; + pkt_type |= RTE_PTYPE_INNER_L2_ETHER_QINQ; + vh = rte_pktmbuf_read(m, off + sizeof(*vh), sizeof(*vh), + &vh_copy); + if (unlikely(vh == NULL)) + return pkt_type; + off += 2 * sizeof(*vh); + hdr_lens->inner_l2_len += 2 * sizeof(*vh); + proto = vh->eth_proto; + } + + if ((layers & RTE_PTYPE_INNER_L3_MASK) == 0) + return pkt_type; + + if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) { + const struct ipv4_hdr *ip4h; + struct ipv4_hdr ip4h_copy; + + ip4h = rte_pktmbuf_read(m, off, sizeof(*ip4h), &ip4h_copy); + if (unlikely(ip4h == NULL)) + return pkt_type; + + pkt_type |= ptype_inner_l3_ip(ip4h->version_ihl); + hdr_lens->inner_l3_len = ip4_hlen(ip4h); + off += hdr_lens->inner_l3_len; + + if ((layers & RTE_PTYPE_INNER_L4_MASK) == 0) + return pkt_type; + if (ip4h->fragment_offset & + rte_cpu_to_be_16(IPV4_HDR_OFFSET_MASK | + IPV4_HDR_MF_FLAG)) { + pkt_type |= RTE_PTYPE_INNER_L4_FRAG; + hdr_lens->inner_l4_len = 0; + return pkt_type; + } + proto = ip4h->next_proto_id; + pkt_type |= ptype_inner_l4(proto); + } else if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) { + const struct ipv6_hdr *ip6h; + struct ipv6_hdr ip6h_copy; + int frag = 0; + + ip6h = rte_pktmbuf_read(m, off, sizeof(*ip6h), &ip6h_copy); + if (unlikely(ip6h == NULL)) + return pkt_type; + + proto = ip6h->proto; + hdr_lens->inner_l3_len = sizeof(*ip6h); + off += hdr_lens->inner_l3_len; + pkt_type |= ptype_inner_l3_ip6(proto); + if ((pkt_type & RTE_PTYPE_INNER_L3_MASK) == + RTE_PTYPE_INNER_L3_IPV6_EXT) { + uint32_t prev_off; + + prev_off = off; + proto = skip_ip6_ext(proto, m, &off, &frag); + hdr_lens->inner_l3_len += off - prev_off; + } + if (proto == 0) + return pkt_type; + + if ((layers & RTE_PTYPE_INNER_L4_MASK) == 0) + return pkt_type; + + if (frag) { + pkt_type |= RTE_PTYPE_INNER_L4_FRAG; + hdr_lens->inner_l4_len = 0; + return pkt_type; + } + pkt_type |= ptype_inner_l4(proto); + } + + if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) == RTE_PTYPE_INNER_L4_UDP) { + hdr_lens->inner_l4_len = sizeof(struct udp_hdr); + } else if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) == + RTE_PTYPE_INNER_L4_TCP) { + const struct tcp_hdr *th; + struct tcp_hdr th_copy; + + th = rte_pktmbuf_read(m, off, sizeof(*th), &th_copy); + if (unlikely(th == NULL)) + return pkt_type & (RTE_PTYPE_INNER_L2_MASK | + RTE_PTYPE_INNER_L3_MASK); + hdr_lens->inner_l4_len = (th->data_off & 0xf0) >> 2; + } else if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) == + RTE_PTYPE_INNER_L4_SCTP) { + hdr_lens->inner_l4_len = sizeof(struct sctp_hdr); + } else { + hdr_lens->inner_l4_len = 0; + } + + return pkt_type; +} diff --git a/lib/librte_net/rte_net.h b/lib/librte_net/rte_net.h new file mode 100644 index 00000000..d4156aea --- /dev/null +++ b/lib/librte_net/rte_net.h @@ -0,0 +1,94 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_NET_PTYPE_H_ +#define _RTE_NET_PTYPE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Structure containing header lengths associated to a packet, filled + * by rte_net_get_ptype(). + */ +struct rte_net_hdr_lens { + uint8_t l2_len; + uint8_t l3_len; + uint8_t l4_len; + uint8_t tunnel_len; + uint8_t inner_l2_len; + uint8_t inner_l3_len; + uint8_t inner_l4_len; +}; + +/** + * Parse an Ethernet packet to get its packet type. + * + * This function parses the network headers in mbuf data and return its + * packet type. + * + * If it is provided by the user, it also fills a rte_net_hdr_lens + * structure that contains the lengths of the parsed network + * headers. Each length field is valid only if the associated packet + * type is set. For instance, hdr_lens->l2_len is valid only if + * (retval & RTE_PTYPE_L2_MASK) != RTE_PTYPE_UNKNOWN. + * + * Supported packet types are: + * L2: Ether, Vlan, QinQ + * L3: IPv4, IPv6 + * L4: TCP, UDP, SCTP + * Tunnels: IPv4, IPv6, Gre, Nvgre + * + * @param m + * The packet mbuf to be parsed. + * @param hdr_lens + * A pointer to a structure where the header lengths will be returned, + * or NULL. + * @param layers + * List of layers to parse. The function will stop at the first + * empty layer. Examples: + * - To parse all known layers, use RTE_PTYPE_ALL_MASK. + * - To parse only L2 and L3, use RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK + * @return + * The packet type of the packet. + */ +uint32_t rte_net_get_ptype(const struct rte_mbuf *m, + struct rte_net_hdr_lens *hdr_lens, uint32_t layers); + +#ifdef __cplusplus +} +#endif + + +#endif /* _RTE_NET_PTYPE_H_ */ diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map new file mode 100644 index 00000000..3b15e651 --- /dev/null +++ b/lib/librte_net/rte_net_version.map @@ -0,0 +1,6 @@ +DPDK_16.11 { + global: + rte_net_get_ptype; + + local: *; +}; diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c index ea5ccd98..59686837 100644 --- a/lib/librte_pdump/rte_pdump.c +++ b/lib/librte_pdump/rte_pdump.c @@ -225,29 +225,6 @@ pdump_tx(uint8_t port __rte_unused, uint16_t qidx __rte_unused, return nb_pkts; } -static int -pdump_get_dombdf(char *device_id, char *domBDF, size_t len) -{ - int ret; - struct rte_pci_addr dev_addr = {0}; - - /* identify if device_id is pci address or name */ - ret = eal_parse_pci_DomBDF(device_id, &dev_addr); - if (ret < 0) - return -1; - - if (dev_addr.domain) - ret = snprintf(domBDF, len, "%u:%u:%u.%u", dev_addr.domain, - dev_addr.bus, dev_addr.devid, - dev_addr.function); - else - ret = snprintf(domBDF, len, "%u:%u.%u", dev_addr.bus, - dev_addr.devid, - dev_addr.function); - - return ret; -} - static int pdump_regitser_rx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue, struct rte_ring *ring, struct rte_mempool *mp, @@ -292,7 +269,7 @@ pdump_regitser_rx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue, if (ret < 0) { RTE_LOG(ERR, PDUMP, "failed to remove rx callback, errno=%d\n", - rte_errno); + -ret); return ret; } cbs->cb = NULL; @@ -347,7 +324,7 @@ pdump_regitser_tx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue, if (ret < 0) { RTE_LOG(ERR, PDUMP, "failed to remove tx callback, errno=%d\n", - rte_errno); + -ret); return ret; } cbs->cb = NULL; @@ -885,7 +862,6 @@ rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue, void *filter) { int ret = 0; - char domBDF[DEVICE_ID_SIZE]; ret = pdump_validate_ring_mp(ring, mp); if (ret < 0) @@ -894,11 +870,7 @@ rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue, if (ret < 0) return ret; - if (pdump_get_dombdf(device_id, domBDF, sizeof(domBDF)) > 0) - ret = pdump_prepare_client_request(domBDF, queue, flags, - ENABLE, ring, mp, filter); - else - ret = pdump_prepare_client_request(device_id, queue, flags, + ret = pdump_prepare_client_request(device_id, queue, flags, ENABLE, ring, mp, filter); return ret; @@ -928,17 +900,12 @@ rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue, uint32_t flags) { int ret = 0; - char domBDF[DEVICE_ID_SIZE]; ret = pdump_validate_flags(flags); if (ret < 0) return ret; - if (pdump_get_dombdf(device_id, domBDF, sizeof(domBDF)) > 0) - ret = pdump_prepare_client_request(domBDF, queue, flags, - DISABLE, NULL, NULL, NULL); - else - ret = pdump_prepare_client_request(device_id, queue, flags, + ret = pdump_prepare_client_request(device_id, queue, flags, DISABLE, NULL, NULL, NULL); return ret; diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h index b5f4e2f3..924b8043 100644 --- a/lib/librte_pdump/rte_pdump.h +++ b/lib/librte_pdump/rte_pdump.h @@ -41,6 +41,10 @@ * packet dump library to provide packet capturing support on dpdk. */ +#include +#include +#include + #ifdef __cplusplus extern "C" { #endif diff --git a/lib/librte_pipeline/rte_pipeline.h b/lib/librte_pipeline/rte_pipeline.h index 84d18025..f3663483 100644 --- a/lib/librte_pipeline/rte_pipeline.h +++ b/lib/librte_pipeline/rte_pipeline.h @@ -87,6 +87,7 @@ extern "C" { #include #include +#include struct rte_mbuf; @@ -244,6 +245,7 @@ struct rte_pipeline_table_entry { /** Reserved action */ enum rte_pipeline_action action; + RTE_STD_C11 union { /** Output port ID (meta-data for "Send packet to output port" action) */ @@ -252,7 +254,7 @@ struct rte_pipeline_table_entry { uint32_t table_id; }; /** Start of table entry area for user defined actions and meta-data */ - uint8_t action_data[0]; + __extension__ uint8_t action_data[0]; }; /** diff --git a/lib/librte_port/Makefile b/lib/librte_port/Makefile index 3d84a0e4..44fa7352 100644 --- a/lib/librte_port/Makefile +++ b/lib/librte_port/Makefile @@ -56,6 +56,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_frag.c SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_ras.c endif SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_sched.c +SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_fd.c ifeq ($(CONFIG_RTE_LIBRTE_KNI),y) SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_kni.c endif @@ -70,6 +71,7 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_frag.h SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_ras.h endif SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_sched.h +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_fd.h ifeq ($(CONFIG_RTE_LIBRTE_KNI),y) SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_kni.h endif diff --git a/lib/librte_port/rte_port_fd.c b/lib/librte_port/rte_port_fd.c new file mode 100644 index 00000000..0d640f34 --- /dev/null +++ b/lib/librte_port/rte_port_fd.c @@ -0,0 +1,552 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include + +#include +#include + +#include "rte_port_fd.h" + +/* + * Port FD Reader + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(port, val) \ + do { port->stats.n_pkts_in += val; } while (0) +#define RTE_PORT_FD_READER_STATS_PKTS_DROP_ADD(port, val) \ + do { port->stats.n_pkts_drop += val; } while (0) + +#else + +#define RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_FD_READER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_fd_reader { + struct rte_port_in_stats stats; + int fd; + uint32_t mtu; + struct rte_mempool *mempool; +}; + +static void * +rte_port_fd_reader_create(void *params, int socket_id) +{ + struct rte_port_fd_reader_params *conf = + (struct rte_port_fd_reader_params *) params; + struct rte_port_fd_reader *port; + + /* Check input parameters */ + if (conf == NULL) { + RTE_LOG(ERR, PORT, "%s: params is NULL\n", __func__); + return NULL; + } + if (conf->fd < 0) { + RTE_LOG(ERR, PORT, "%s: Invalid file descriptor\n", __func__); + return NULL; + } + if (conf->mtu == 0) { + RTE_LOG(ERR, PORT, "%s: Invalid MTU\n", __func__); + return NULL; + } + if (conf->mempool == NULL) { + RTE_LOG(ERR, PORT, "%s: Invalid mempool\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->fd = conf->fd; + port->mtu = conf->mtu; + port->mempool = conf->mempool; + + return port; +} + +static int +rte_port_fd_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + struct rte_port_fd_reader *p = (struct rte_port_fd_reader *) port; + uint32_t i; + + if (rte_mempool_get_bulk(p->mempool, (void **) pkts, n_pkts) != 0) + return 0; + + for (i = 0; i < n_pkts; i++) { + rte_mbuf_refcnt_set(pkts[i], 1); + rte_pktmbuf_reset(pkts[i]); + } + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = pkts[i]; + void *pkt_data = rte_pktmbuf_mtod(pkt, void *); + ssize_t n_bytes; + + n_bytes = read(p->fd, pkt_data, (size_t) p->mtu); + if (n_bytes <= 0) + break; + + pkt->data_len = n_bytes; + pkt->pkt_len = n_bytes; + } + + for ( ; i < n_pkts; i++) + rte_pktmbuf_free(pkts[i]); + + RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(p, i); + + return n_pkts; +} + +static int +rte_port_fd_reader_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__); + return -EINVAL; + } + + rte_free(port); + + return 0; +} + +static int rte_port_fd_reader_stats_read(void *port, + struct rte_port_in_stats *stats, int clear) +{ + struct rte_port_fd_reader *p = + (struct rte_port_fd_reader *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port FD Writer + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(port, val) \ + do { port->stats.n_pkts_in += val; } while (0) +#define RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(port, val) \ + do { port->stats.n_pkts_drop += val; } while (0) + +#else + +#define RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_fd_writer { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + uint32_t tx_burst_sz; + uint16_t tx_buf_count; + uint32_t fd; +}; + +static void * +rte_port_fd_writer_create(void *params, int socket_id) +{ + struct rte_port_fd_writer_params *conf = + (struct rte_port_fd_writer_params *) params; + struct rte_port_fd_writer *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) || + (!rte_is_power_of_2(conf->tx_burst_sz))) { + RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->fd = conf->fd; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + + return port; +} + +static inline void +send_burst(struct rte_port_fd_writer *p) +{ + uint32_t i; + + for (i = 0; i < p->tx_buf_count; i++) { + struct rte_mbuf *pkt = p->tx_buf[i]; + void *pkt_data = rte_pktmbuf_mtod(pkt, void*); + size_t n_bytes = rte_pktmbuf_data_len(pkt); + ssize_t ret; + + ret = write(p->fd, pkt_data, n_bytes); + if (ret < 0) + break; + } + + RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - i); + + for (i = 0; i < p->tx_buf_count; i++) + rte_pktmbuf_free(p->tx_buf[i]); + + p->tx_buf_count = 0; +} + +static int +rte_port_fd_writer_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_fd_writer *p = + (struct rte_port_fd_writer *) port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + return 0; +} + +static int +rte_port_fd_writer_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_fd_writer *p = + (struct rte_port_fd_writer *) port; + uint32_t tx_buf_count = p->tx_buf_count; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) + p->tx_buf[tx_buf_count++] = pkts[i]; + RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, n_pkts); + } else + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + return 0; +} + +static int +rte_port_fd_writer_flush(void *port) +{ + struct rte_port_fd_writer *p = + (struct rte_port_fd_writer *) port; + + if (p->tx_buf_count > 0) + send_burst(p); + + return 0; +} + +static int +rte_port_fd_writer_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + rte_port_fd_writer_flush(port); + rte_free(port); + + return 0; +} + +static int rte_port_fd_writer_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_fd_writer *p = + (struct rte_port_fd_writer *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port FD Writer Nodrop + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \ + do { port->stats.n_pkts_in += val; } while (0) +#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \ + do { port->stats.n_pkts_drop += val; } while (0) + +#else + +#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_fd_writer_nodrop { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + uint32_t tx_burst_sz; + uint16_t tx_buf_count; + uint64_t n_retries; + uint32_t fd; +}; + +static void * +rte_port_fd_writer_nodrop_create(void *params, int socket_id) +{ + struct rte_port_fd_writer_nodrop_params *conf = + (struct rte_port_fd_writer_nodrop_params *) params; + struct rte_port_fd_writer_nodrop *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->fd < 0) || + (conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) || + (!rte_is_power_of_2(conf->tx_burst_sz))) { + RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->fd = conf->fd; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + + /* + * When n_retries is 0 it means that we should wait for every packet to + * send no matter how many retries should it take. To limit number of + * branches in fast path, we use UINT64_MAX instead of branching. + */ + port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries; + + return port; +} + +static inline void +send_burst_nodrop(struct rte_port_fd_writer_nodrop *p) +{ + uint64_t n_retries; + uint32_t i; + + n_retries = 0; + for (i = 0; (i < p->tx_buf_count) && (n_retries < p->n_retries); i++) { + struct rte_mbuf *pkt = p->tx_buf[i]; + void *pkt_data = rte_pktmbuf_mtod(pkt, void*); + size_t n_bytes = rte_pktmbuf_data_len(pkt); + + for ( ; n_retries < p->n_retries; n_retries++) { + ssize_t ret; + + ret = write(p->fd, pkt_data, n_bytes); + if (ret) + break; + } + } + + RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - i); + + for (i = 0; i < p->tx_buf_count; i++) + rte_pktmbuf_free(p->tx_buf[i]); + + p->tx_buf_count = 0; +} + +static int +rte_port_fd_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_fd_writer_nodrop *p = + (struct rte_port_fd_writer_nodrop *) port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_fd_writer_nodrop_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_fd_writer_nodrop *p = + (struct rte_port_fd_writer_nodrop *) port; + uint32_t tx_buf_count = p->tx_buf_count; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) + p->tx_buf[tx_buf_count++] = pkts[i]; + RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts); + } else + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_fd_writer_nodrop_flush(void *port) +{ + struct rte_port_fd_writer_nodrop *p = + (struct rte_port_fd_writer_nodrop *) port; + + if (p->tx_buf_count > 0) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_fd_writer_nodrop_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + rte_port_fd_writer_nodrop_flush(port); + rte_free(port); + +return 0; +} + +static int rte_port_fd_writer_nodrop_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_fd_writer_nodrop *p = + (struct rte_port_fd_writer_nodrop *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_in_ops rte_port_fd_reader_ops = { + .f_create = rte_port_fd_reader_create, + .f_free = rte_port_fd_reader_free, + .f_rx = rte_port_fd_reader_rx, + .f_stats = rte_port_fd_reader_stats_read, +}; + +struct rte_port_out_ops rte_port_fd_writer_ops = { + .f_create = rte_port_fd_writer_create, + .f_free = rte_port_fd_writer_free, + .f_tx = rte_port_fd_writer_tx, + .f_tx_bulk = rte_port_fd_writer_tx_bulk, + .f_flush = rte_port_fd_writer_flush, + .f_stats = rte_port_fd_writer_stats_read, +}; + +struct rte_port_out_ops rte_port_fd_writer_nodrop_ops = { + .f_create = rte_port_fd_writer_nodrop_create, + .f_free = rte_port_fd_writer_nodrop_free, + .f_tx = rte_port_fd_writer_nodrop_tx, + .f_tx_bulk = rte_port_fd_writer_nodrop_tx_bulk, + .f_flush = rte_port_fd_writer_nodrop_flush, + .f_stats = rte_port_fd_writer_nodrop_stats_read, +}; diff --git a/lib/librte_port/rte_port_fd.h b/lib/librte_port/rte_port_fd.h new file mode 100644 index 00000000..77a2d31b --- /dev/null +++ b/lib/librte_port/rte_port_fd.h @@ -0,0 +1,105 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_FD_H__ +#define __INCLUDE_RTE_PORT_FD_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port FD Device + * + * fd_reader: input port built on top of valid non-blocking file descriptor + * fd_writer: output port built on top of valid non-blocking file descriptor + * + ***/ + +#include + +#include +#include "rte_port.h" + +/** fd_reader port parameters */ +struct rte_port_fd_reader_params { + /** File descriptor */ + int fd; + + /** Maximum Transfer Unit (MTU) */ + uint32_t mtu; + + /** Pre-initialized buffer pool */ + struct rte_mempool *mempool; +}; + +/** fd_reader port operations */ +extern struct rte_port_in_ops rte_port_fd_reader_ops; + +/** fd_writer port parameters */ +struct rte_port_fd_writer_params { + /** File descriptor */ + int fd; + + /**< Recommended write burst size. The actual burst size can be + * bigger or smaller than this value. + */ + uint32_t tx_burst_sz; +}; + +/** fd_writer port operations */ +extern struct rte_port_out_ops rte_port_fd_writer_ops; + +/** fd_writer_nodrop port parameters */ +struct rte_port_fd_writer_nodrop_params { + /** File descriptor */ + int fd; + + /**< Recommended write burst size. The actual burst size can be + * bigger or smaller than this value. + */ + uint32_t tx_burst_sz; + + /** Maximum number of retries, 0 for no limit */ + uint32_t n_retries; +}; + +/** fd_writer_nodrop port operations */ +extern struct rte_port_out_ops rte_port_fd_writer_nodrop_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_port/rte_port_source_sink.h b/lib/librte_port/rte_port_source_sink.h index 4db8a8a8..be585a77 100644 --- a/lib/librte_port/rte_port_source_sink.h +++ b/lib/librte_port/rte_port_source_sink.h @@ -55,7 +55,7 @@ struct rte_port_source_params { struct rte_mempool *mempool; /** The full path of the pcap file to read packets from */ - char *file_name; + const char *file_name; /** The number of bytes to be read from each packet in the * pcap file. If this value is 0, the whole packet is read; * if it is bigger than packet size, the generated packets @@ -69,7 +69,7 @@ extern struct rte_port_in_ops rte_port_source_ops; /** sink port parameters */ struct rte_port_sink_params { /** The full path of the pcap file to write the packets to */ - char *file_name; + const char *file_name; /** The maximum number of packets write to the pcap file. * If this value is 0, the "infinite" write will be carried * out. diff --git a/lib/librte_port/rte_port_version.map b/lib/librte_port/rte_port_version.map index 048c20d7..6470629b 100644 --- a/lib/librte_port/rte_port_version.map +++ b/lib/librte_port/rte_port_version.map @@ -42,3 +42,12 @@ DPDK_16.07 { rte_port_kni_writer_nodrop_ops; } DPDK_2.2; + +DPDK_16.11 { + global: + + rte_port_fd_reader_ops; + rte_port_fd_writer_ops; + rte_port_fd_writer_nodrop_ops; + +} DPDK_16.07; diff --git a/lib/librte_reorder/rte_reorder.h b/lib/librte_reorder/rte_reorder.h index c7a2934c..737e0554 100644 --- a/lib/librte_reorder/rte_reorder.h +++ b/lib/librte_reorder/rte_reorder.h @@ -44,6 +44,8 @@ * */ +#include + #ifdef __cplusplus extern "C" { #endif diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h index 0e22e694..32b8c8d2 100644 --- a/lib/librte_ring/rte_ring.h +++ b/lib/librte_ring/rte_ring.h @@ -187,7 +187,7 @@ struct rte_ring { struct rte_ring_debug_stats stats[RTE_MAX_LCORE]; #endif - void * ring[0] __rte_cache_aligned; /**< Memory space of ring starts here. + void *ring[] __rte_cache_aligned; /**< Memory space of ring starts here. * not volatile so need to be careful * about compiler re-ordering */ }; @@ -341,7 +341,7 @@ void rte_ring_free(struct rte_ring *r); int rte_ring_set_water_mark(struct rte_ring *r, unsigned count); /** - * Dump the status of the ring to the console. + * Dump the status of the ring to a file. * * @param f * A pointer to a file for output diff --git a/lib/librte_sched/rte_bitmap.h b/lib/librte_sched/rte_bitmap.h index ff675c58..010d752c 100644 --- a/lib/librte_sched/rte_bitmap.h +++ b/lib/librte_sched/rte_bitmap.h @@ -64,6 +64,7 @@ extern "C" { * ***/ +#include #include #include #include @@ -103,7 +104,7 @@ struct rte_bitmap { uint32_t go2; /**< Bitmap scan: Go/stop condition for current array2 cache line */ /* Storage space for array1 and array2 */ - uint8_t memory[0]; + uint8_t memory[]; }; static inline void diff --git a/lib/librte_sched/rte_reciprocal.h b/lib/librte_sched/rte_reciprocal.h index abd15251..5e21f096 100644 --- a/lib/librte_sched/rte_reciprocal.h +++ b/lib/librte_sched/rte_reciprocal.h @@ -22,6 +22,8 @@ #ifndef _RTE_RECIPROCAL_H_ #define _RTE_RECIPROCAL_H_ +#include + struct rte_reciprocal { uint32_t m; uint8_t sh1, sh2; diff --git a/lib/librte_sched/rte_sched_common.h b/lib/librte_sched/rte_sched_common.h index 8920adec..aed144ba 100644 --- a/lib/librte_sched/rte_sched_common.h +++ b/lib/librte_sched/rte_sched_common.h @@ -38,6 +38,7 @@ extern "C" { #endif +#include #include #define __rte_aligned_16 __attribute__((__aligned__(16))) diff --git a/lib/librte_table/Makefile b/lib/librte_table/Makefile index 7a8a3f3c..c82c7696 100644 --- a/lib/librte_table/Makefile +++ b/lib/librte_table/Makefile @@ -1,6 +1,6 @@ # BSD LICENSE # -# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# Copyright(c) 2010-2016 Intel Corporation. All rights reserved. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -51,6 +51,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_lpm_ipv6.c ifeq ($(CONFIG_RTE_LIBRTE_ACL),y) SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_acl.c endif +SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_cuckoo.c SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_key8.c SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_key16.c SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_key32.c @@ -80,5 +81,6 @@ DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_lpm ifeq ($(CONFIG_RTE_LIBRTE_ACL),y) DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_acl endif +DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_hash include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_table/rte_table_hash.h b/lib/librte_table/rte_table_hash.h index 9d17516a..57505a6f 100644 --- a/lib/librte_table/rte_table_hash.h +++ b/lib/librte_table/rte_table_hash.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -363,6 +363,35 @@ struct rte_table_hash_key32_ext_params { /** Extendible bucket hash table operations */ extern struct rte_table_ops rte_table_hash_key32_ext_ops; +/** Cuckoo hash table parameters */ +struct rte_table_hash_cuckoo_params { + /** Key size (number of bytes */ + uint32_t key_size; + + /** Maximum number of hash table entries */ + uint32_t n_keys; + + /** Hash function used to calculate hash */ + rte_table_hash_op_hash f_hash; + + /** Seed value or Init value used by f_hash */ + uint32_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; + + /** Hash table name */ + const char *name; +}; + +/** Cuckoo hash table operations */ +extern struct rte_table_ops rte_table_hash_cuckoo_dosig_ops; + #ifdef __cplusplus } #endif diff --git a/lib/librte_table/rte_table_hash_cuckoo.c b/lib/librte_table/rte_table_hash_cuckoo.c new file mode 100644 index 00000000..ff7baee3 --- /dev/null +++ b/lib/librte_table/rte_table_hash_cuckoo.c @@ -0,0 +1,382 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +#include +#include +#include +#include +#include + +#include +#include "rte_table_hash.h" + +#ifdef RTE_TABLE_STATS_COLLECT + +#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(table, val) \ + (table->stats.n_pkts_in += val) +#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(table, val) \ + (table->stats.n_pkts_lookup_miss += val) + +#else + +#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(table, val) +#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(table, val) + +#endif + + +struct rte_table_hash { + struct rte_table_stats stats; + + /* Input parameters */ + uint32_t key_size; + uint32_t entry_size; + uint32_t n_keys; + rte_table_hash_op_hash f_hash; + uint32_t seed; + uint32_t signature_offset; + uint32_t key_offset; + const char *name; + + /* cuckoo hash table object */ + struct rte_hash *h_table; + + /* Lookup table */ + uint8_t memory[0] __rte_cache_aligned; }; + +static int +check_params_create_hash_cuckoo(const struct +rte_table_hash_cuckoo_params *params) { + /* Check for valid parameters */ + if (params == NULL) { + RTE_LOG(ERR, TABLE, "NULL Input Parameters.\n"); + return -EINVAL; + } + + if (params->key_size == 0) { + RTE_LOG(ERR, TABLE, "Invalid key_size.\n"); + return -EINVAL; + } + + if (params->n_keys == 0) { + RTE_LOG(ERR, TABLE, "Invalid n_keys.\n"); + return -EINVAL; + } + + if (params->f_hash == NULL) { + RTE_LOG(ERR, TABLE, "f_hash is NULL.\n"); + return -EINVAL; + } + + if (params->name == NULL) { + RTE_LOG(ERR, TABLE, "Table name is NULL.\n"); + return -EINVAL; + } + + return 0; +} + +static void * +rte_table_hash_cuckoo_create(void *params, + int socket_id, + uint32_t entry_size) +{ + struct rte_hash *rte_hash_handle; + struct rte_table_hash *t; + uint32_t total_size, total_cl_size; + + /* Check input parameters */ + struct rte_table_hash_cuckoo_params *p = + (struct rte_table_hash_cuckoo_params *) params; + + if (check_params_create_hash_cuckoo(params)) + return NULL; + + /* Memory allocation */ + total_cl_size = + (sizeof(struct rte_table_hash) + + RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE; + total_cl_size += (p->n_keys * entry_size + + RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE; + total_size = total_cl_size * RTE_CACHE_LINE_SIZE; + + t = rte_zmalloc_socket("TABLE", + total_size, + RTE_CACHE_LINE_SIZE, + socket_id); + if (t == NULL) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %u bytes for Cuckoo hash table\n", + __func__, + (uint32_t)sizeof(struct rte_table_hash)); + return NULL; + } + + /* Create cuckoo hash table */ + struct rte_hash_parameters hash_cuckoo_params = { + .entries = p->n_keys, + .key_len = p->key_size, + .hash_func = (rte_hash_function)(p->f_hash), + .hash_func_init_val = p->seed, + .socket_id = socket_id, + .name = p->name + }; + + rte_hash_handle = rte_hash_find_existing(p->name); + if (rte_hash_handle == NULL) { + rte_hash_handle = rte_hash_create(&hash_cuckoo_params); + if (NULL == rte_hash_handle) { + RTE_LOG(ERR, TABLE, + "%s: failed to create cuckoo hash table. keysize: %u", + __func__, hash_cuckoo_params.key_len); + rte_free(t); + return NULL; + } + } + + /* initialize the cuckoo hash parameters */ + t->key_size = p->key_size; + t->entry_size = entry_size; + t->n_keys = p->n_keys; + t->f_hash = p->f_hash; + t->seed = p->seed; + t->signature_offset = p->signature_offset; + t->key_offset = p->key_offset; + t->name = p->name; + t->h_table = rte_hash_handle; + + RTE_LOG(INFO, TABLE, + "%s: Cuckoo Hash table memory footprint is %u bytes\n", + __func__, total_size); + return t; +} + +static int +rte_table_hash_cuckoo_free(void *table) { + if (table == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + struct rte_table_hash *t = (struct rte_table_hash *)table; + + rte_hash_free(t->h_table); + rte_free(t); + + return 0; +} + +static int +rte_table_hash_cuckoo_entry_add(void *table, void *key, void *entry, + int *key_found, void **entry_ptr) { + int pos = 0; + + if (table == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + if (key == NULL) { + RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__); + return -EINVAL; + } + + if (entry == NULL) { + RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__); + return -EINVAL; + } + + struct rte_table_hash *t = (struct rte_table_hash *)table; + + /* Find Existing entries */ + pos = rte_hash_lookup(t->h_table, key); + if (pos >= 0) { + uint8_t *existing_entry; + + *key_found = 1; + existing_entry = &t->memory[pos * t->entry_size]; + memcpy(existing_entry, entry, t->entry_size); + *entry_ptr = existing_entry; + + return 0; +} else if (pos == -ENOENT) { + /* Entry not found. Adding new entry */ + uint8_t *new_entry; + + pos = rte_hash_add_key(t->h_table, key); + if (pos < 0) { + RTE_LOG(ERR, TABLE, + "%s: Entry not added, status : %u\n", + __func__, pos); + return pos; + } + + new_entry = &t->memory[pos * t->entry_size]; + memcpy(new_entry, entry, t->entry_size); + + *key_found = 0; + *entry_ptr = new_entry; + return 0; + } + return pos; +} + +static int +rte_table_hash_cuckoo_entry_delete(void *table, void *key, + int *key_found, __rte_unused void *entry) { + int pos = 0; + + if (table == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + if (key == NULL) { + RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__); + return -EINVAL; + } + + struct rte_table_hash *t = (struct rte_table_hash *)table; + + pos = rte_hash_del_key(t->h_table, key); + if (pos >= 0) { + *key_found = 1; + uint8_t *entry_ptr = &t->memory[pos * t->entry_size]; + + if (entry) + memcpy(entry, entry_ptr, t->entry_size); + + memset(&t->memory[pos * t->entry_size], 0, t->entry_size); + } + + return pos; +} + + +static int +rte_table_hash_cuckoo_lookup_dosig(void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *t = (struct rte_table_hash *)table; + uint64_t pkts_mask_out = 0; + uint32_t i; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + + RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(t, n_pkts_in); + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + const uint8_t *keys[64]; + int32_t positions[64], status; + + /* Keys for bulk lookup */ + for (i = 0; i < n_pkts_in; i++) + keys[i] = RTE_MBUF_METADATA_UINT8_PTR(pkts[i], + t->key_offset); + + /* Bulk Lookup */ + status = rte_hash_lookup_bulk(t->h_table, + (const void **) keys, + n_pkts_in, + positions); + + if (status == 0) { + for (i = 0; i < n_pkts_in; i++) { + if (likely(positions[i] >= 0)) { + uint64_t pkt_mask = 1LLU << i; + + entries[i] = &t->memory[positions[i] + * t->entry_size]; + pkts_mask_out |= pkt_mask; + } + } + } + } else { + for (i = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX + - __builtin_clzll(pkts_mask)); i++) { + uint64_t pkt_mask = 1LLU << i; + + if (pkt_mask & pkts_mask) { + struct rte_mbuf *pkt = pkts[i]; + uint8_t *key = RTE_MBUF_METADATA_UINT8_PTR(pkt, + t->key_offset); + int pos; + + pos = rte_hash_lookup(t->h_table, key); + if (likely(pos >= 0)) { + entries[i] = &t->memory[pos + * t->entry_size]; + pkts_mask_out |= pkt_mask; + } + } + } + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(t, + n_pkts_in - __builtin_popcountll(pkts_mask_out)); + + return 0; + +} + +static int +rte_table_hash_cuckoo_stats_read(void *table, struct rte_table_stats *stats, + int clear) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + + if (stats != NULL) + memcpy(stats, &t->stats, sizeof(t->stats)); + + if (clear) + memset(&t->stats, 0, sizeof(t->stats)); + + return 0; +} + +struct rte_table_ops rte_table_hash_cuckoo_dosig_ops = { + .f_create = rte_table_hash_cuckoo_create, + .f_free = rte_table_hash_cuckoo_free, + .f_add = rte_table_hash_cuckoo_entry_add, + .f_delete = rte_table_hash_cuckoo_entry_delete, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_hash_cuckoo_lookup_dosig, + .f_stats = rte_table_hash_cuckoo_stats_read, +}; diff --git a/lib/librte_table/rte_table_hash_key16.c b/lib/librte_table/rte_table_hash_key16.c index b7e000fd..08d4d77e 100644 --- a/lib/librte_table/rte_table_hash_key16.c +++ b/lib/librte_table/rte_table_hash_key16.c @@ -130,7 +130,7 @@ rte_table_hash_create_key16_lru(void *params, /* Check input parameters */ if ((check_params_create_lru(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0)) + ((sizeof(struct rte_bucket_4_16) % 64) != 0)) return NULL; n_entries_per_bucket = 4; key_size = 16; @@ -344,7 +344,7 @@ rte_table_hash_create_key16_ext(void *params, /* Check input parameters */ if ((check_params_create_ext(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0)) + ((sizeof(struct rte_bucket_4_16) % 64) != 0)) return NULL; n_entries_per_bucket = 4; diff --git a/lib/librte_table/rte_table_hash_key32.c b/lib/librte_table/rte_table_hash_key32.c index a7aba492..161f6b7a 100644 --- a/lib/librte_table/rte_table_hash_key32.c +++ b/lib/librte_table/rte_table_hash_key32.c @@ -129,7 +129,7 @@ rte_table_hash_create_key32_lru(void *params, /* Check input parameters */ if ((check_params_create_lru(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0)) { + ((sizeof(struct rte_bucket_4_32) % 64) != 0)) { return NULL; } n_entries_per_bucket = 4; @@ -337,7 +337,7 @@ rte_table_hash_create_key32_ext(void *params, /* Check input parameters */ if ((check_params_create_ext(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0)) + ((sizeof(struct rte_bucket_4_32) % 64) != 0)) return NULL; n_entries_per_bucket = 4; diff --git a/lib/librte_table/rte_table_hash_key8.c b/lib/librte_table/rte_table_hash_key8.c index e2e2bdc4..b04f60dc 100644 --- a/lib/librte_table/rte_table_hash_key8.c +++ b/lib/librte_table/rte_table_hash_key8.c @@ -125,7 +125,7 @@ rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size) /* Check input parameters */ if ((check_params_create_lru(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) != 0)) { + ((sizeof(struct rte_bucket_4_8) % 64) != 0)) { return NULL; } n_entries_per_bucket = 4; @@ -332,7 +332,7 @@ rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size) /* Check input parameters */ if ((check_params_create_ext(p) != 0) || ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || - ((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) != 0)) + ((sizeof(struct rte_bucket_4_8) % 64) != 0)) return NULL; n_entries_per_bucket = 4; diff --git a/lib/librte_table/rte_table_version.map b/lib/librte_table/rte_table_version.map index 459c2da3..e1eaa275 100644 --- a/lib/librte_table/rte_table_version.map +++ b/lib/librte_table/rte_table_version.map @@ -28,4 +28,11 @@ DPDK_2.2 { rte_table_hash_key16_ext_dosig_ops; rte_table_hash_key16_lru_dosig_ops; +}; + +DPDK_16.07 { + global: + + rte_table_hash_cuckoo_dosig_ops; + } DPDK_2.0; diff --git a/lib/librte_timer/rte_timer.h b/lib/librte_timer/rte_timer.h index 77547c6b..a276a736 100644 --- a/lib/librte_timer/rte_timer.h +++ b/lib/librte_timer/rte_timer.h @@ -66,6 +66,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -91,6 +92,7 @@ enum rte_timer_type { * config) and an owner (the id of the lcore that owns the timer). */ union rte_timer_status { + RTE_STD_C11 struct { uint16_t state; /**< Stop, pending, running, config. */ int16_t owner; /**< The lcore that owns the timer. */ diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile index 538adb0b..415ffc6e 100644 --- a/lib/librte_vhost/Makefile +++ b/lib/librte_vhost/Makefile @@ -39,25 +39,16 @@ EXPORT_MAP := rte_vhost_version.map LIBABIVER := 3 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y) CFLAGS += -I vhost_user LDLIBS += -lpthread -else -CFLAGS += -I vhost_cuse -LDLIBS += -lfuse -endif ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y) LDLIBS += -lnuma endif # all source are stored in SRCS-y -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := virtio-net.c vhost_rxtx.c -ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y) -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c -else -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c vhost_cuse/eventfd_copy.c -endif +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c socket.c vhost.c vhost_user.c \ + virtio_net.c # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h diff --git a/lib/librte_vhost/eventfd_link/Makefile b/lib/librte_vhost/eventfd_link/Makefile deleted file mode 100644 index 3140e8bf..00000000 --- a/lib/librte_vhost/eventfd_link/Makefile +++ /dev/null @@ -1,41 +0,0 @@ -# BSD LICENSE -# -# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Intel Corporation nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -RTE_KERNELDIR ?= /lib/modules/$(shell uname -r)/build - -obj-m += eventfd_link.o - - -all: - make -C $(RTE_KERNELDIR) M=$(PWD) modules - -clean: - make -C $(RTE_KERNELDIR) M=$(PWD) clean diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.c b/lib/librte_vhost/eventfd_link/eventfd_link.c deleted file mode 100644 index 4b05b5a8..00000000 --- a/lib/librte_vhost/eventfd_link/eventfd_link.c +++ /dev/null @@ -1,277 +0,0 @@ -/*- - * GPL LICENSE SUMMARY - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * The full GNU General Public License is included in this distribution - * in the file called LICENSE.GPL. - * - * Contact Information: - * Intel Corporation - */ - -#include -#include -#include -#include -#include - -#include "eventfd_link.h" - - -/* - * get_files_struct is copied from fs/file.c - */ -struct files_struct * -get_files_struct(struct task_struct *task) -{ - struct files_struct *files; - - task_lock(task); - files = task->files; - if (files) - atomic_inc(&files->count); - task_unlock(task); - - return files; -} - -/* - * put_files_struct is extracted from fs/file.c - */ -void -put_files_struct(struct files_struct *files) -{ - if (atomic_dec_and_test(&files->count)) - BUG(); -} - -static struct file * -fget_from_files(struct files_struct *files, unsigned fd) -{ - struct file *file; - - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - if (file->f_mode & FMODE_PATH || - !atomic_long_inc_not_zero(&file->f_count)) { - - file = NULL; - } - } - rcu_read_unlock(); - - return file; -} - -static long -eventfd_link_ioctl_copy2(unsigned long arg) -{ - void __user *argp = (void __user *) arg; - struct task_struct *task_target = NULL; - struct file *file; - struct files_struct *files; - struct eventfd_copy2 eventfd_copy2; - long ret = -EFAULT; - - if (copy_from_user(&eventfd_copy2, argp, sizeof(struct eventfd_copy2))) - goto out; - - /* - * Find the task struct for the target pid - */ - ret = -ESRCH; - - task_target = - get_pid_task(find_vpid(eventfd_copy2.pid), PIDTYPE_PID); - if (task_target == NULL) { - pr_info("Unable to find pid %d\n", eventfd_copy2.pid); - goto out; - } - - ret = -ESTALE; - files = get_files_struct(task_target); - if (files == NULL) { - pr_info("Failed to get target files struct\n"); - goto out_task; - } - - ret = -EBADF; - file = fget_from_files(files, eventfd_copy2.fd); - put_files_struct(files); - - if (file == NULL) { - pr_info("Failed to get fd %d from target\n", eventfd_copy2.fd); - goto out_task; - } - - /* - * Install the file struct from the target process into the - * newly allocated file desciptor of the source process. - */ - ret = get_unused_fd_flags(eventfd_copy2.flags); - if (ret < 0) { - fput(file); - goto out_task; - } - fd_install(ret, file); - -out_task: - put_task_struct(task_target); -out: - return ret; -} - -static long -eventfd_link_ioctl_copy(unsigned long arg) -{ - void __user *argp = (void __user *) arg; - struct task_struct *task_target = NULL; - struct file *file; - struct files_struct *files; - struct fdtable *fdt; - struct eventfd_copy eventfd_copy; - long ret = -EFAULT; - - if (copy_from_user(&eventfd_copy, argp, sizeof(struct eventfd_copy))) - goto out; - - /* - * Find the task struct for the target pid - */ - ret = -ESRCH; - - task_target = - get_pid_task(find_vpid(eventfd_copy.target_pid), PIDTYPE_PID); - if (task_target == NULL) { - pr_info("Unable to find pid %d\n", eventfd_copy.target_pid); - goto out; - } - - ret = -ESTALE; - files = get_files_struct(current); - if (files == NULL) { - pr_info("Failed to get current files struct\n"); - goto out_task; - } - - ret = -EBADF; - file = fget_from_files(files, eventfd_copy.source_fd); - - if (file == NULL) { - pr_info("Failed to get fd %d from source\n", - eventfd_copy.source_fd); - put_files_struct(files); - goto out_task; - } - - /* - * Release the existing eventfd in the source process - */ - spin_lock(&files->file_lock); - fput(file); - filp_close(file, files); - fdt = files_fdtable(files); - fdt->fd[eventfd_copy.source_fd] = NULL; - spin_unlock(&files->file_lock); - - put_files_struct(files); - - /* - * Find the file struct associated with the target fd. - */ - - ret = -ESTALE; - files = get_files_struct(task_target); - if (files == NULL) { - pr_info("Failed to get target files struct\n"); - goto out_task; - } - - ret = -EBADF; - file = fget_from_files(files, eventfd_copy.target_fd); - put_files_struct(files); - - if (file == NULL) { - pr_info("Failed to get fd %d from target\n", - eventfd_copy.target_fd); - goto out_task; - } - - /* - * Install the file struct from the target process into the - * file desciptor of the source process, - */ - - fd_install(eventfd_copy.source_fd, file); - ret = 0; - -out_task: - put_task_struct(task_target); -out: - return ret; -} - -static long -eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg) -{ - long ret = -ENOIOCTLCMD; - - switch (ioctl) { - case EVENTFD_COPY: - ret = eventfd_link_ioctl_copy(arg); - break; - case EVENTFD_COPY2: - ret = eventfd_link_ioctl_copy2(arg); - break; - } - - return ret; -} - -static const struct file_operations eventfd_link_fops = { - .owner = THIS_MODULE, - .unlocked_ioctl = eventfd_link_ioctl, -}; - - -static struct miscdevice eventfd_link_misc = { - .minor = MISC_DYNAMIC_MINOR, - .name = "eventfd-link", - .fops = &eventfd_link_fops, -}; - -static int __init -eventfd_link_init(void) -{ - return misc_register(&eventfd_link_misc); -} - -module_init(eventfd_link_init); - -static void __exit -eventfd_link_exit(void) -{ - misc_deregister(&eventfd_link_misc); -} - -module_exit(eventfd_link_exit); - -MODULE_VERSION("0.0.1"); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Anthony Fee"); -MODULE_DESCRIPTION("Link eventfd"); -MODULE_ALIAS("devname:eventfd-link"); diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.h b/lib/librte_vhost/eventfd_link/eventfd_link.h deleted file mode 100644 index 5ebc20b8..00000000 --- a/lib/librte_vhost/eventfd_link/eventfd_link.h +++ /dev/null @@ -1,94 +0,0 @@ -/*- - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * The full GNU General Public License is included in this distribution - * in the file called LICENSE.GPL. - * - * Contact Information: - * Intel Corporation - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#ifndef _EVENTFD_LINK_H_ -#define _EVENTFD_LINK_H_ - -/* - * arguements for the EVENTFD_COPY ioctl - */ -struct eventfd_copy { - unsigned target_fd; /* fd in the target pid */ - unsigned source_fd; /* fd in the calling pid */ - pid_t target_pid; /* pid of the target pid */ -}; - -/* - * ioctl to copy an fd entry in calling process to an fd in a target process - * NOTE: this one should be - * #define EVENTFD_COPY _IOWR('D', 1, struct eventfd_copy) actually - */ -#define EVENTFD_COPY 1 - -/* - * arguments for the EVENTFD_COPY2 ioctl - */ -struct eventfd_copy2 { - unsigned fd; /* fd to steal */ - pid_t pid; /* pid of the process to steal from */ - unsigned flags; /* flags to allocate new fd with */ -}; - -/* - * ioctl to copy an fd entry from the target process into newly allocated - * fd in the calling process - */ -#define EVENTFD_COPY2 _IOW('D', 2, struct eventfd_copy2) - -#endif /* _EVENTFD_LINK_H_ */ diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c new file mode 100644 index 00000000..2d3eeb7d --- /dev/null +++ b/lib/librte_vhost/fd_man.c @@ -0,0 +1,299 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "fd_man.h" + +/** + * Returns the index in the fdset for a given fd. + * If fd is -1, it means to search for a free entry. + * @return + * index for the fd, or -1 if fd isn't in the fdset. + */ +static int +fdset_find_fd(struct fdset *pfdset, int fd) +{ + int i; + + if (pfdset == NULL) + return -1; + + for (i = 0; i < MAX_FDS && pfdset->fd[i].fd != fd; i++) + ; + + return i == MAX_FDS ? -1 : i; +} + +static int +fdset_find_free_slot(struct fdset *pfdset) +{ + return fdset_find_fd(pfdset, -1); +} + +static int +fdset_add_fd(struct fdset *pfdset, int idx, int fd, + fd_cb rcb, fd_cb wcb, void *dat) +{ + struct fdentry *pfdentry; + + if (pfdset == NULL || idx >= MAX_FDS || fd >= FD_SETSIZE) + return -1; + + pfdentry = &pfdset->fd[idx]; + pfdentry->fd = fd; + pfdentry->rcb = rcb; + pfdentry->wcb = wcb; + pfdentry->dat = dat; + + return 0; +} + +/** + * Fill the read/write fd_set with the fds in the fdset. + * @return + * the maximum fds filled in the read/write fd_set. + */ +static int +fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset) +{ + struct fdentry *pfdentry; + int i, maxfds = -1; + int num = MAX_FDS; + + if (pfdset == NULL) + return -1; + + for (i = 0; i < num; i++) { + pfdentry = &pfdset->fd[i]; + if (pfdentry->fd != -1) { + int added = 0; + if (pfdentry->rcb && rfset) { + FD_SET(pfdentry->fd, rfset); + added = 1; + } + if (pfdentry->wcb && wfset) { + FD_SET(pfdentry->fd, wfset); + added = 1; + } + if (added) + maxfds = pfdentry->fd < maxfds ? + maxfds : pfdentry->fd; + } + } + return maxfds; +} + +void +fdset_init(struct fdset *pfdset) +{ + int i; + + if (pfdset == NULL) + return; + + for (i = 0; i < MAX_FDS; i++) { + pfdset->fd[i].fd = -1; + pfdset->fd[i].dat = NULL; + } + pfdset->num = 0; +} + +/** + * Register the fd in the fdset with read/write handler and context. + */ +int +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat) +{ + int i; + + if (pfdset == NULL || fd == -1) + return -1; + + pthread_mutex_lock(&pfdset->fd_mutex); + + /* Find a free slot in the list. */ + i = fdset_find_free_slot(pfdset); + if (i == -1 || fdset_add_fd(pfdset, i, fd, rcb, wcb, dat) < 0) { + pthread_mutex_unlock(&pfdset->fd_mutex); + return -2; + } + + pfdset->num++; + + pthread_mutex_unlock(&pfdset->fd_mutex); + + return 0; +} + +/** + * Unregister the fd from the fdset. + * Returns context of a given fd or NULL. + */ +void * +fdset_del(struct fdset *pfdset, int fd) +{ + int i; + void *dat = NULL; + + if (pfdset == NULL || fd == -1) + return NULL; + + do { + pthread_mutex_lock(&pfdset->fd_mutex); + + i = fdset_find_fd(pfdset, fd); + if (i != -1 && pfdset->fd[i].busy == 0) { + /* busy indicates r/wcb is executing! */ + dat = pfdset->fd[i].dat; + pfdset->fd[i].fd = -1; + pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL; + pfdset->fd[i].dat = NULL; + pfdset->num--; + i = -1; + } + pthread_mutex_unlock(&pfdset->fd_mutex); + } while (i != -1); + + return dat; +} + +/** + * Unregister the fd at the specified slot from the fdset. + */ +static void +fdset_del_slot(struct fdset *pfdset, int index) +{ + if (pfdset == NULL || index < 0 || index >= MAX_FDS) + return; + + pthread_mutex_lock(&pfdset->fd_mutex); + + pfdset->fd[index].fd = -1; + pfdset->fd[index].rcb = pfdset->fd[index].wcb = NULL; + pfdset->fd[index].dat = NULL; + pfdset->num--; + + pthread_mutex_unlock(&pfdset->fd_mutex); +} + +/** + * This functions runs in infinite blocking loop until there is no fd in + * pfdset. It calls corresponding r/w handler if there is event on the fd. + * + * Before the callback is called, we set the flag to busy status; If other + * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it + * will wait until the flag is reset to zero(which indicates the callback is + * finished), then it could free the context after fdset_del. + */ +void +fdset_event_dispatch(struct fdset *pfdset) +{ + fd_set rfds, wfds; + int i, maxfds; + struct fdentry *pfdentry; + int num = MAX_FDS; + fd_cb rcb, wcb; + void *dat; + int fd; + int remove1, remove2; + int ret; + + if (pfdset == NULL) + return; + + while (1) { + struct timeval tv; + tv.tv_sec = 1; + tv.tv_usec = 0; + FD_ZERO(&rfds); + FD_ZERO(&wfds); + pthread_mutex_lock(&pfdset->fd_mutex); + + maxfds = fdset_fill(&rfds, &wfds, pfdset); + + pthread_mutex_unlock(&pfdset->fd_mutex); + + /* + * When select is blocked, other threads might unregister + * listenfds from and register new listenfds into fdset. + * When select returns, the entries for listenfds in the fdset + * might have been updated. It is ok if there is unwanted call + * for new listenfds. + */ + ret = select(maxfds + 1, &rfds, &wfds, NULL, &tv); + if (ret <= 0) + continue; + + for (i = 0; i < num; i++) { + remove1 = remove2 = 0; + pthread_mutex_lock(&pfdset->fd_mutex); + pfdentry = &pfdset->fd[i]; + fd = pfdentry->fd; + rcb = pfdentry->rcb; + wcb = pfdentry->wcb; + dat = pfdentry->dat; + pfdentry->busy = 1; + pthread_mutex_unlock(&pfdset->fd_mutex); + if (fd >= 0 && FD_ISSET(fd, &rfds) && rcb) + rcb(fd, dat, &remove1); + if (fd >= 0 && FD_ISSET(fd, &wfds) && wcb) + wcb(fd, dat, &remove2); + pfdentry->busy = 0; + /* + * fdset_del needs to check busy flag. + * We don't allow fdset_del to be called in callback + * directly. + */ + /* + * When we are to clean up the fd from fdset, + * because the fd is closed in the cb, + * the old fd val could be reused by when creates new + * listen fd in another thread, we couldn't call + * fd_set_del. + */ + if (remove1 || remove2) + fdset_del_slot(pfdset, i); + } + } +} diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h new file mode 100644 index 00000000..bd66ed1c --- /dev/null +++ b/lib/librte_vhost/fd_man.h @@ -0,0 +1,67 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _FD_MAN_H_ +#define _FD_MAN_H_ +#include +#include + +#define MAX_FDS 1024 + +typedef void (*fd_cb)(int fd, void *dat, int *remove); + +struct fdentry { + int fd; /* -1 indicates this entry is empty */ + fd_cb rcb; /* callback when this fd is readable. */ + fd_cb wcb; /* callback when this fd is writeable.*/ + void *dat; /* fd context */ + int busy; /* whether this entry is being used in cb. */ +}; + +struct fdset { + struct fdentry fd[MAX_FDS]; + pthread_mutex_t fd_mutex; + int num; /* current fd number of this fdset */ +}; + + +void fdset_init(struct fdset *pfdset); + +int fdset_add(struct fdset *pfdset, int fd, + fd_cb rcb, fd_cb wcb, void *dat); + +void *fdset_del(struct fdset *pfdset, int fd); + +void fdset_event_dispatch(struct fdset *pfdset); + +#endif diff --git a/lib/librte_vhost/libvirt/qemu-wrap.py b/lib/librte_vhost/libvirt/qemu-wrap.py deleted file mode 100755 index e6a2cc9d..00000000 --- a/lib/librte_vhost/libvirt/qemu-wrap.py +++ /dev/null @@ -1,387 +0,0 @@ -#!/usr/bin/python -#/* -# * BSD LICENSE -# * -# * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. -# * All rights reserved. -# * -# * Redistribution and use in source and binary forms, with or without -# * modification, are permitted provided that the following conditions -# * are met: -# * -# * * Redistributions of source code must retain the above copyright -# * notice, this list of conditions and the following disclaimer. -# * * Redistributions in binary form must reproduce the above copyright -# * notice, this list of conditions and the following disclaimer in -# * the documentation and/or other materials provided with the -# * distribution. -# * * Neither the name of Intel Corporation nor the names of its -# * contributors may be used to endorse or promote products derived -# * from this software without specific prior written permission. -# * -# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# */ - -##################################################################### -# This script is designed to modify the call to the QEMU emulator -# to support userspace vhost when starting a guest machine through -# libvirt with vhost enabled. The steps to enable this are as follows -# and should be run as root: -# -# 1. Place this script in a libvirtd's binary search PATH ($PATH) -# A good location would be in the same directory that the QEMU -# binary is located -# -# 2. Ensure that the script has the same owner/group and file -# permissions as the QEMU binary -# -# 3. Update the VM xml file using "virsh edit VM.xml" -# -# 3.a) Set the VM to use the launch script -# -# Set the emulator path contained in the -# tags -# -# e.g replace /usr/bin/qemu-kvm -# with /usr/bin/qemu-wrap.py -# -# 3.b) Set the VM's device's to use vhost-net offload -# -# -# -# -# -# -# 4. Enable libvirt to access our userpace device file by adding it to -# controllers cgroup for libvirtd using the following steps -# -# 4.a) In /etc/libvirt/qemu.conf add/edit the following lines: -# 1) cgroup_controllers = [ ... "devices", ... ] -# 2) clear_emulator_capabilities = 0 -# 3) user = "root" -# 4) group = "root" -# 5) cgroup_device_acl = [ -# "/dev/null", "/dev/full", "/dev/zero", -# "/dev/random", "/dev/urandom", -# "/dev/ptmx", "/dev/kvm", "/dev/kqemu", -# "/dev/rtc", "/dev/hpet", "/dev/net/tun", -# "/dev/", -# "/dev/hugepages", -# ] -# -# 4.b) Disable SELinux or set to permissive mode -# -# 4.c) Mount cgroup device controller -# "mkdir /dev/cgroup" -# "mount -t cgroup none /dev/cgroup -o devices" -# -# 4.d) Set hugetlbfs_mount variable - ( Optional ) -# VMs using userspace vhost must use hugepage backed -# memory. This can be enabled in the libvirt XML -# config by adding a memory backing section to the -# XML config e.g. -# -# -# -# This memory backing section should be added after the -# and sections. This will add -# flags "-mem-prealloc -mem-path " to the QEMU -# command line. The hugetlbfs_mount variable can be used -# to override the default passed through by libvirt. -# -# if "-mem-prealloc" or "-mem-path " are not passed -# through and a vhost device is detected then these options will -# be automatically added by this script. This script will detect -# the system hugetlbfs mount point to be used for . The -# default for this script can be overidden by the -# hugetlbfs_dir variable in the configuration section of this script. -# -# -# 4.e) Restart the libvirtd system process -# e.g. on Fedora "systemctl restart libvirtd.service" -# -# -# 4.f) Edit the Configuration Parameters section of this script -# to point to the correct emulator location and set any -# addition options -# -# The script modifies the libvirtd Qemu call by modifying/adding -# options based on the configuration parameters below. -# NOTE: -# emul_path and us_vhost_path must be set -# All other parameters are optional -##################################################################### - - -############################################# -# Configuration Parameters -############################################# -#Path to QEMU binary -emul_path = "/usr/local/bin/qemu-system-x86_64" - -#Path to userspace vhost device file -# This filename should match the --dev-basename parameters of -# the command used to launch the userspace vhost sample application e.g. -# if the sample app lauch command is: -# ./build/vhost-switch ..... --dev-basename usvhost -# then this variable should be set to: -# us_vhost_path = "/dev/usvhost" -us_vhost_path = "/dev/usvhost" - -#List of additional user defined emulation options. These options will -#be added to all Qemu calls -emul_opts_user = [] - -#List of additional user defined emulation options for vhost only. -#These options will only be added to vhost enabled guests -emul_opts_user_vhost = [] - -#For all VHOST enabled VMs, the VM memory is preallocated from hugetlbfs -# Set this variable to one to enable this option for all VMs -use_huge_all = 0 - -#Instead of autodetecting, override the hugetlbfs directory by setting -#this variable -hugetlbfs_dir = "" - -############################################# - - -############################################# -# ****** Do Not Modify Below this Line ****** -############################################# - -import sys, os, subprocess -import time -import signal - - -#List of open userspace vhost file descriptors -fd_list = [] - -#additional virtio device flags when using userspace vhost -vhost_flags = [ "csum=off", - "gso=off", - "guest_tso4=off", - "guest_tso6=off", - "guest_ecn=off" - ] - -#String of the path to the Qemu process pid -qemu_pid = "/tmp/%d-qemu.pid" % os.getpid() - -############################################# -# Signal haldler to kill Qemu subprocess -############################################# -def kill_qemu_process(signum, stack): - pidfile = open(qemu_pid, 'r') - pid = int(pidfile.read()) - os.killpg(pid, signal.SIGTERM) - pidfile.close() - - -############################################# -# Find the system hugefile mount point. -# Note: -# if multiple hugetlbfs mount points exist -# then the first one found will be used -############################################# -def find_huge_mount(): - - if (len(hugetlbfs_dir)): - return hugetlbfs_dir - - huge_mount = "" - - if (os.access("/proc/mounts", os.F_OK)): - f = open("/proc/mounts", "r") - line = f.readline() - while line: - line_split = line.split(" ") - if line_split[2] == 'hugetlbfs': - huge_mount = line_split[1] - break - line = f.readline() - else: - print "/proc/mounts not found" - exit (1) - - f.close - if len(huge_mount) == 0: - print "Failed to find hugetlbfs mount point" - exit (1) - - return huge_mount - - -############################################# -# Get a userspace Vhost file descriptor -############################################# -def get_vhost_fd(): - - if (os.access(us_vhost_path, os.F_OK)): - fd = os.open( us_vhost_path, os.O_RDWR) - else: - print ("US-Vhost file %s not found" %us_vhost_path) - exit (1) - - return fd - - -############################################# -# Check for vhostfd. if found then replace -# with our own vhost fd and append any vhost -# flags onto the end -############################################# -def modify_netdev_arg(arg): - - global fd_list - vhost_in_use = 0 - s = '' - new_opts = [] - netdev_opts = arg.split(",") - - for opt in netdev_opts: - #check if vhost is used - if "vhost" == opt[:5]: - vhost_in_use = 1 - else: - new_opts.append(opt) - - #if using vhost append vhost options - if vhost_in_use == 1: - #append vhost on option - new_opts.append('vhost=on') - #append vhostfd ption - new_fd = get_vhost_fd() - new_opts.append('vhostfd=' + str(new_fd)) - fd_list.append(new_fd) - - #concatenate all options - for opt in new_opts: - if len(s) > 0: - s+=',' - - s+=opt - - return s - - -############################################# -# Main -############################################# -def main(): - - global fd_list - global vhost_in_use - new_args = [] - num_cmd_args = len(sys.argv) - emul_call = '' - mem_prealloc_set = 0 - mem_path_set = 0 - num = 0; - - #parse the parameters - while (num < num_cmd_args): - arg = sys.argv[num] - - #Check netdev +1 parameter for vhostfd - if arg == '-netdev': - num_vhost_devs = len(fd_list) - new_args.append(arg) - - num+=1 - arg = sys.argv[num] - mod_arg = modify_netdev_arg(arg) - new_args.append(mod_arg) - - #append vhost flags if this is a vhost device - # and -device is the next arg - # i.e -device -opt1,-opt2,...,-opt3,%vhost - if (num_vhost_devs < len(fd_list)): - num+=1 - arg = sys.argv[num] - if arg == '-device': - new_args.append(arg) - num+=1 - new_arg = sys.argv[num] - for flag in vhost_flags: - new_arg = ''.join([new_arg,',',flag]) - new_args.append(new_arg) - else: - new_args.append(arg) - elif arg == '-mem-prealloc': - mem_prealloc_set = 1 - new_args.append(arg) - elif arg == '-mem-path': - mem_path_set = 1 - new_args.append(arg) - - else: - new_args.append(arg) - - num+=1 - - #Set Qemu binary location - emul_call+=emul_path - emul_call+=" " - - #Add prealloc mem options if using vhost and not already added - if ((len(fd_list) > 0) and (mem_prealloc_set == 0)): - emul_call += "-mem-prealloc " - - #Add mempath mem options if using vhost and not already added - if ((len(fd_list) > 0) and (mem_path_set == 0)): - #Detect and add hugetlbfs mount point - mp = find_huge_mount() - mp = "".join(["-mem-path ", mp]) - emul_call += mp - emul_call += " " - - #add user options - for opt in emul_opts_user: - emul_call += opt - emul_call += " " - - #Add add user vhost only options - if len(fd_list) > 0: - for opt in emul_opts_user_vhost: - emul_call += opt - emul_call += " " - - #Add updated libvirt options - iter_args = iter(new_args) - #skip 1st arg i.e. call to this script - next(iter_args) - for arg in iter_args: - emul_call+=str(arg) - emul_call+= " " - - emul_call += "-pidfile %s " % qemu_pid - #Call QEMU - process = subprocess.Popen(emul_call, shell=True, preexec_fn=os.setsid) - - for sig in [signal.SIGTERM, signal.SIGINT, signal.SIGHUP, signal.SIGQUIT]: - signal.signal(sig, kill_qemu_process) - - process.wait() - - #Close usvhost files - for fd in fd_list: - os.close(fd) - #Cleanup temporary files - if os.access(qemu_pid, os.F_OK): - os.remove(qemu_pid) - -if __name__ == "__main__": - main() diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h index 9caa6221..926039c5 100644 --- a/lib/librte_vhost/rte_virtio_net.h +++ b/lib/librte_vhost/rte_virtio_net.h @@ -53,16 +53,13 @@ #define RTE_VHOST_USER_CLIENT (1ULL << 0) #define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1) +#define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2) /* Enum for virtqueue management. */ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; /** * Device and vring operations. - * - * Make sure to set VIRTIO_DEV_RUNNING to the device flags in new_device and - * remove it in destroy_device. - * */ struct virtio_net_device_ops { int (*new_device)(int vid); /**< Add device. */ @@ -126,9 +123,8 @@ int rte_vhost_get_numa_node(int vid); uint32_t rte_vhost_get_queue_num(int vid); /** - * Get the virtio net device's ifname. For vhost-cuse, ifname is the - * path of the char device. For vhost-user, ifname is the vhost-user - * socket file path. + * Get the virtio net device's ifname, which is the vhost-user socket + * file path. * * @param vid * virtio-net device ID diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c new file mode 100644 index 00000000..aaa9c270 --- /dev/null +++ b/lib/librte_vhost/socket.c @@ -0,0 +1,615 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "fd_man.h" +#include "vhost.h" +#include "vhost_user.h" + +/* + * Every time rte_vhost_driver_register() is invoked, an associated + * vhost_user_socket struct will be created. + */ +struct vhost_user_socket { + char *path; + int listenfd; + int connfd; + bool is_server; + bool reconnect; + bool dequeue_zero_copy; +}; + +struct vhost_user_connection { + struct vhost_user_socket *vsocket; + int vid; +}; + +#define MAX_VHOST_SOCKET 1024 +struct vhost_user { + struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET]; + struct fdset fdset; + int vsocket_cnt; + pthread_mutex_t mutex; +}; + +#define MAX_VIRTIO_BACKLOG 128 + +static void vhost_user_server_new_connection(int fd, void *data, int *remove); +static void vhost_user_read_cb(int fd, void *dat, int *remove); +static int vhost_user_create_client(struct vhost_user_socket *vsocket); + +static struct vhost_user vhost_user = { + .fdset = { + .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} }, + .fd_mutex = PTHREAD_MUTEX_INITIALIZER, + .num = 0 + }, + .vsocket_cnt = 0, + .mutex = PTHREAD_MUTEX_INITIALIZER, +}; + +/* return bytes# of read on success or negative val on failure. */ +int +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) +{ + struct iovec iov; + struct msghdr msgh; + size_t fdsize = fd_num * sizeof(int); + char control[CMSG_SPACE(fdsize)]; + struct cmsghdr *cmsg; + int ret; + + memset(&msgh, 0, sizeof(msgh)); + iov.iov_base = buf; + iov.iov_len = buflen; + + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + msgh.msg_control = control; + msgh.msg_controllen = sizeof(control); + + ret = recvmsg(sockfd, &msgh, 0); + if (ret <= 0) { + RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n"); + return ret; + } + + if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n"); + return -1; + } + + for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; + cmsg = CMSG_NXTHDR(&msgh, cmsg)) { + if ((cmsg->cmsg_level == SOL_SOCKET) && + (cmsg->cmsg_type == SCM_RIGHTS)) { + memcpy(fds, CMSG_DATA(cmsg), fdsize); + break; + } + } + + return ret; +} + +int +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) +{ + + struct iovec iov; + struct msghdr msgh; + size_t fdsize = fd_num * sizeof(int); + char control[CMSG_SPACE(fdsize)]; + struct cmsghdr *cmsg; + int ret; + + memset(&msgh, 0, sizeof(msgh)); + iov.iov_base = buf; + iov.iov_len = buflen; + + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + + if (fds && fd_num > 0) { + msgh.msg_control = control; + msgh.msg_controllen = sizeof(control); + cmsg = CMSG_FIRSTHDR(&msgh); + cmsg->cmsg_len = CMSG_LEN(fdsize); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), fds, fdsize); + } else { + msgh.msg_control = NULL; + msgh.msg_controllen = 0; + } + + do { + ret = sendmsg(sockfd, &msgh, 0); + } while (ret < 0 && errno == EINTR); + + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n"); + return ret; + } + + return ret; +} + +static void +vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) +{ + int vid; + size_t size; + struct vhost_user_connection *conn; + int ret; + + conn = malloc(sizeof(*conn)); + if (conn == NULL) { + close(fd); + return; + } + + vid = vhost_new_device(); + if (vid == -1) { + close(fd); + free(conn); + return; + } + + size = strnlen(vsocket->path, PATH_MAX); + vhost_set_ifname(vid, vsocket->path, size); + + if (vsocket->dequeue_zero_copy) + vhost_enable_dequeue_zero_copy(vid); + + RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid); + + vsocket->connfd = fd; + conn->vsocket = vsocket; + conn->vid = vid; + ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb, + NULL, conn); + if (ret < 0) { + vsocket->connfd = -1; + free(conn); + close(fd); + RTE_LOG(ERR, VHOST_CONFIG, + "failed to add fd %d into vhost server fdset\n", + fd); + } +} + +/* call back when there is new vhost-user connection from client */ +static void +vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused) +{ + struct vhost_user_socket *vsocket = dat; + + fd = accept(fd, NULL, NULL); + if (fd < 0) + return; + + RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd); + vhost_user_add_connection(fd, vsocket); +} + +static void +vhost_user_read_cb(int connfd, void *dat, int *remove) +{ + struct vhost_user_connection *conn = dat; + struct vhost_user_socket *vsocket = conn->vsocket; + int ret; + + ret = vhost_user_msg_handler(conn->vid, connfd); + if (ret < 0) { + vsocket->connfd = -1; + close(connfd); + *remove = 1; + vhost_destroy_device(conn->vid); + free(conn); + + if (vsocket->reconnect) + vhost_user_create_client(vsocket); + } +} + +static int +create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server) +{ + int fd; + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) + return -1; + RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n", + is_server ? "server" : "client", fd); + + if (!is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) { + RTE_LOG(ERR, VHOST_CONFIG, + "vhost-user: can't set nonblocking mode for socket, fd: " + "%d (%s)\n", fd, strerror(errno)); + close(fd); + return -1; + } + + memset(un, 0, sizeof(*un)); + un->sun_family = AF_UNIX; + strncpy(un->sun_path, path, sizeof(un->sun_path)); + un->sun_path[sizeof(un->sun_path) - 1] = '\0'; + + return fd; +} + +static int +vhost_user_create_server(struct vhost_user_socket *vsocket) +{ + int fd; + int ret; + struct sockaddr_un un; + const char *path = vsocket->path; + + fd = create_unix_socket(path, &un, vsocket->is_server); + if (fd < 0) + return -1; + + ret = bind(fd, (struct sockaddr *)&un, sizeof(un)); + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to bind to %s: %s; remove it and try again\n", + path, strerror(errno)); + goto err; + } + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path); + + ret = listen(fd, MAX_VIRTIO_BACKLOG); + if (ret < 0) + goto err; + + vsocket->listenfd = fd; + ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection, + NULL, vsocket); + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to add listen fd %d to vhost server fdset\n", + fd); + goto err; + } + + return 0; + +err: + close(fd); + return -1; +} + +struct vhost_user_reconnect { + struct sockaddr_un un; + int fd; + struct vhost_user_socket *vsocket; + + TAILQ_ENTRY(vhost_user_reconnect) next; +}; + +TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect); +struct vhost_user_reconnect_list { + struct vhost_user_reconnect_tailq_list head; + pthread_mutex_t mutex; +}; + +static struct vhost_user_reconnect_list reconn_list; +static pthread_t reconn_tid; + +static int +vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz) +{ + int ret, flags; + + ret = connect(fd, un, sz); + if (ret < 0 && errno != EISCONN) + return -1; + + flags = fcntl(fd, F_GETFL, 0); + if (flags < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "can't get flags for connfd %d\n", fd); + return -2; + } + if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) { + RTE_LOG(ERR, VHOST_CONFIG, + "can't disable nonblocking on fd %d\n", fd); + return -2; + } + return 0; +} + +static void * +vhost_user_client_reconnect(void *arg __rte_unused) +{ + int ret; + struct vhost_user_reconnect *reconn, *next; + + while (1) { + pthread_mutex_lock(&reconn_list.mutex); + + /* + * An equal implementation of TAILQ_FOREACH_SAFE, + * which does not exist on all platforms. + */ + for (reconn = TAILQ_FIRST(&reconn_list.head); + reconn != NULL; reconn = next) { + next = TAILQ_NEXT(reconn, next); + + ret = vhost_user_connect_nonblock(reconn->fd, + (struct sockaddr *)&reconn->un, + sizeof(reconn->un)); + if (ret == -2) { + close(reconn->fd); + RTE_LOG(ERR, VHOST_CONFIG, + "reconnection for fd %d failed\n", + reconn->fd); + goto remove_fd; + } + if (ret == -1) + continue; + + RTE_LOG(INFO, VHOST_CONFIG, + "%s: connected\n", reconn->vsocket->path); + vhost_user_add_connection(reconn->fd, reconn->vsocket); +remove_fd: + TAILQ_REMOVE(&reconn_list.head, reconn, next); + free(reconn); + } + + pthread_mutex_unlock(&reconn_list.mutex); + sleep(1); + } + + return NULL; +} + +static int +vhost_user_reconnect_init(void) +{ + int ret; + + pthread_mutex_init(&reconn_list.mutex, NULL); + TAILQ_INIT(&reconn_list.head); + + ret = pthread_create(&reconn_tid, NULL, + vhost_user_client_reconnect, NULL); + if (ret < 0) + RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread"); + + return ret; +} + +static int +vhost_user_create_client(struct vhost_user_socket *vsocket) +{ + int fd; + int ret; + struct sockaddr_un un; + const char *path = vsocket->path; + struct vhost_user_reconnect *reconn; + + fd = create_unix_socket(path, &un, vsocket->is_server); + if (fd < 0) + return -1; + + ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&un, + sizeof(un)); + if (ret == 0) { + vhost_user_add_connection(fd, vsocket); + return 0; + } + + RTE_LOG(ERR, VHOST_CONFIG, + "failed to connect to %s: %s\n", + path, strerror(errno)); + + if (ret == -2 || !vsocket->reconnect) { + close(fd); + return -1; + } + + RTE_LOG(ERR, VHOST_CONFIG, "%s: reconnecting...\n", path); + reconn = malloc(sizeof(*reconn)); + if (reconn == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to allocate memory for reconnect\n"); + close(fd); + return -1; + } + reconn->un = un; + reconn->fd = fd; + reconn->vsocket = vsocket; + pthread_mutex_lock(&reconn_list.mutex); + TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next); + pthread_mutex_unlock(&reconn_list.mutex); + + return 0; +} + +/* + * Register a new vhost-user socket; here we could act as server + * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag + * is set. + */ +int +rte_vhost_driver_register(const char *path, uint64_t flags) +{ + int ret = -1; + struct vhost_user_socket *vsocket; + + if (!path) + return -1; + + pthread_mutex_lock(&vhost_user.mutex); + + if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) { + RTE_LOG(ERR, VHOST_CONFIG, + "error: the number of vhost sockets reaches maximum\n"); + goto out; + } + + vsocket = malloc(sizeof(struct vhost_user_socket)); + if (!vsocket) + goto out; + memset(vsocket, 0, sizeof(struct vhost_user_socket)); + vsocket->path = strdup(path); + vsocket->connfd = -1; + vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY; + + if ((flags & RTE_VHOST_USER_CLIENT) != 0) { + vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT); + if (vsocket->reconnect && reconn_tid == 0) { + if (vhost_user_reconnect_init() < 0) { + free(vsocket->path); + free(vsocket); + goto out; + } + } + ret = vhost_user_create_client(vsocket); + } else { + vsocket->is_server = true; + ret = vhost_user_create_server(vsocket); + } + if (ret < 0) { + free(vsocket->path); + free(vsocket); + goto out; + } + + vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket; + +out: + pthread_mutex_unlock(&vhost_user.mutex); + + return ret; +} + +static bool +vhost_user_remove_reconnect(struct vhost_user_socket *vsocket) +{ + int found = false; + struct vhost_user_reconnect *reconn, *next; + + pthread_mutex_lock(&reconn_list.mutex); + + for (reconn = TAILQ_FIRST(&reconn_list.head); + reconn != NULL; reconn = next) { + next = TAILQ_NEXT(reconn, next); + + if (reconn->vsocket == vsocket) { + TAILQ_REMOVE(&reconn_list.head, reconn, next); + close(reconn->fd); + free(reconn); + found = true; + break; + } + } + pthread_mutex_unlock(&reconn_list.mutex); + return found; +} + +/** + * Unregister the specified vhost socket + */ +int +rte_vhost_driver_unregister(const char *path) +{ + int i; + int count; + struct vhost_user_connection *conn; + + pthread_mutex_lock(&vhost_user.mutex); + + for (i = 0; i < vhost_user.vsocket_cnt; i++) { + struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; + + if (!strcmp(vsocket->path, path)) { + if (vsocket->is_server) { + fdset_del(&vhost_user.fdset, vsocket->listenfd); + close(vsocket->listenfd); + unlink(path); + } else if (vsocket->reconnect) { + vhost_user_remove_reconnect(vsocket); + } + + conn = fdset_del(&vhost_user.fdset, vsocket->connfd); + if (conn) { + RTE_LOG(INFO, VHOST_CONFIG, + "free connfd = %d for device '%s'\n", + vsocket->connfd, path); + close(vsocket->connfd); + vhost_destroy_device(conn->vid); + free(conn); + } + + free(vsocket->path); + free(vsocket); + + count = --vhost_user.vsocket_cnt; + vhost_user.vsockets[i] = vhost_user.vsockets[count]; + vhost_user.vsockets[count] = NULL; + pthread_mutex_unlock(&vhost_user.mutex); + + return 0; + } + } + pthread_mutex_unlock(&vhost_user.mutex); + + return -1; +} + +int +rte_vhost_driver_session_start(void) +{ + fdset_event_dispatch(&vhost_user.fdset); + return 0; +} diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h deleted file mode 100644 index 38593a29..00000000 --- a/lib/librte_vhost/vhost-net.h +++ /dev/null @@ -1,250 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _VHOST_NET_CDEV_H_ -#define _VHOST_NET_CDEV_H_ -#include -#include -#include -#include -#include - -#include - -#include "rte_virtio_net.h" - -/* Used to indicate that the device is running on a data core */ -#define VIRTIO_DEV_RUNNING 1 - -/* Backend value set by guest. */ -#define VIRTIO_DEV_STOPPED -1 - -#define BUF_VECTOR_MAX 256 - -/** - * Structure contains buffer address, length and descriptor index - * from vring to do scatter RX. - */ -struct buf_vector { - uint64_t buf_addr; - uint32_t buf_len; - uint32_t desc_idx; -}; - -/** - * Structure contains variables relevant to RX/TX virtqueues. - */ -struct vhost_virtqueue { - struct vring_desc *desc; - struct vring_avail *avail; - struct vring_used *used; - uint32_t size; - - /* Last index used on the available ring */ - volatile uint16_t last_used_idx; -#define VIRTIO_INVALID_EVENTFD (-1) -#define VIRTIO_UNINITIALIZED_EVENTFD (-2) - - /* Backend value to determine if device should started/stopped */ - int backend; - /* Used to notify the guest (trigger interrupt) */ - int callfd; - /* Currently unused as polling mode is enabled */ - int kickfd; - int enabled; - - /* Physical address of used ring, for logging */ - uint64_t log_guest_addr; -} __rte_cache_aligned; - -/* Old kernels have no such macro defined */ -#ifndef VIRTIO_NET_F_GUEST_ANNOUNCE - #define VIRTIO_NET_F_GUEST_ANNOUNCE 21 -#endif - - -/* - * Make an extra wrapper for VIRTIO_NET_F_MQ and - * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX as they are - * introduced since kernel v3.8. This makes our - * code buildable for older kernel. - */ -#ifdef VIRTIO_NET_F_MQ - #define VHOST_MAX_QUEUE_PAIRS VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX - #define VHOST_SUPPORTS_MQ (1ULL << VIRTIO_NET_F_MQ) -#else - #define VHOST_MAX_QUEUE_PAIRS 1 - #define VHOST_SUPPORTS_MQ 0 -#endif - -/* - * Define virtio 1.0 for older kernels - */ -#ifndef VIRTIO_F_VERSION_1 - #define VIRTIO_F_VERSION_1 32 -#endif - -/** - * Device structure contains all configuration information relating - * to the device. - */ -struct virtio_net { - /* Frontend (QEMU) memory and memory region information */ - struct virtio_memory *mem; - uint64_t features; - uint64_t protocol_features; - int vid; - uint32_t flags; - uint16_t vhost_hlen; - /* to tell if we need broadcast rarp packet */ - rte_atomic16_t broadcast_rarp; - uint32_t virt_qp_nb; - struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; -#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) - char ifname[IF_NAME_SZ]; - uint64_t log_size; - uint64_t log_base; - uint64_t log_addr; - struct ether_addr mac; - -} __rte_cache_aligned; - -/** - * Information relating to memory regions including offsets to - * addresses in QEMUs memory file. - */ -struct virtio_memory_regions { - uint64_t guest_phys_address; - uint64_t guest_phys_address_end; - uint64_t memory_size; - uint64_t userspace_address; - uint64_t address_offset; -}; - - -/** - * Memory structure includes region and mapping information. - */ -struct virtio_memory { - /* Base QEMU userspace address of the memory file. */ - uint64_t base_address; - uint64_t mapped_address; - uint64_t mapped_size; - uint32_t nregions; - struct virtio_memory_regions regions[0]; -}; - - -/* Macros for printing using RTE_LOG */ -#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 -#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1 - -#ifdef RTE_LIBRTE_VHOST_DEBUG -#define VHOST_MAX_PRINT_BUFF 6072 -#define LOG_LEVEL RTE_LOG_DEBUG -#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args) -#define PRINT_PACKET(device, addr, size, header) do { \ - char *pkt_addr = (char *)(addr); \ - unsigned int index; \ - char packet[VHOST_MAX_PRINT_BUFF]; \ - \ - if ((header)) \ - snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Header size %d: ", (device->vid), (size)); \ - else \ - snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Packet size %d: ", (device->vid), (size)); \ - for (index = 0; index < (size); index++) { \ - snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \ - "%02hhx ", pkt_addr[index]); \ - } \ - snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \ - \ - LOG_DEBUG(VHOST_DATA, "%s", packet); \ -} while (0) -#else -#define LOG_LEVEL RTE_LOG_INFO -#define LOG_DEBUG(log_type, fmt, args...) do {} while (0) -#define PRINT_PACKET(device, addr, size, header) do {} while (0) -#endif - -/** - * Function to convert guest physical addresses to vhost virtual addresses. - * This is used to convert guest virtio buffer addresses. - */ -static inline uint64_t __attribute__((always_inline)) -gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa) -{ - struct virtio_memory_regions *region; - uint32_t regionidx; - uint64_t vhost_va = 0; - - for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) { - region = &dev->mem->regions[regionidx]; - if ((guest_pa >= region->guest_phys_address) && - (guest_pa <= region->guest_phys_address_end)) { - vhost_va = region->address_offset + guest_pa; - break; - } - } - return vhost_va; -} - -struct virtio_net_device_ops const *notify_ops; -struct virtio_net *get_device(int vid); - -int vhost_new_device(void); -void vhost_destroy_device(int); - -void vhost_set_ifname(int, const char *if_name, unsigned int if_len); - -int vhost_get_features(int, uint64_t *); -int vhost_set_features(int, uint64_t *); - -int vhost_set_vring_num(int, struct vhost_vring_state *); -int vhost_set_vring_addr(int, struct vhost_vring_addr *); -int vhost_set_vring_base(int, struct vhost_vring_state *); -int vhost_get_vring_base(int, uint32_t, struct vhost_vring_state *); - -int vhost_set_vring_kick(int, struct vhost_vring_file *); -int vhost_set_vring_call(int, struct vhost_vring_file *); - -int vhost_set_backend(int, struct vhost_vring_file *); - -int vhost_set_owner(int); -int vhost_reset_owner(int); - -/* - * Backend-specific cleanup. Defined by vhost-cuse and vhost-user. - */ -void vhost_backend_cleanup(struct virtio_net *dev); - -#endif /* _VHOST_NET_CDEV_H_ */ diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c new file mode 100644 index 00000000..31825b82 --- /dev/null +++ b/lib/librte_vhost/vhost.c @@ -0,0 +1,430 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#ifdef RTE_LIBRTE_VHOST_NUMA +#include +#endif + +#include +#include +#include +#include +#include +#include + +#include "vhost.h" + +#define VHOST_USER_F_PROTOCOL_FEATURES 30 + +/* Features supported by this lib. */ +#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \ + (1ULL << VIRTIO_NET_F_CTRL_VQ) | \ + (1ULL << VIRTIO_NET_F_CTRL_RX) | \ + (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \ + (VHOST_SUPPORTS_MQ) | \ + (1ULL << VIRTIO_F_VERSION_1) | \ + (1ULL << VHOST_F_LOG_ALL) | \ + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \ + (1ULL << VIRTIO_NET_F_HOST_TSO4) | \ + (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ + (1ULL << VIRTIO_NET_F_CSUM) | \ + (1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ + (1ULL << VIRTIO_RING_F_INDIRECT_DESC)) + +uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES; + +struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; + +/* device ops to add/remove device to/from data core. */ +struct virtio_net_device_ops const *notify_ops; + +struct virtio_net * +get_device(int vid) +{ + struct virtio_net *dev = vhost_devices[vid]; + + if (unlikely(!dev)) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%d) device not found.\n", vid); + } + + return dev; +} + +static void +cleanup_vq(struct vhost_virtqueue *vq, int destroy) +{ + if ((vq->callfd >= 0) && (destroy != 0)) + close(vq->callfd); + if (vq->kickfd >= 0) + close(vq->kickfd); +} + +/* + * Unmap any memory, close any file descriptors and + * free any memory owned by a device. + */ +void +cleanup_device(struct virtio_net *dev, int destroy) +{ + uint32_t i; + + vhost_backend_cleanup(dev); + + for (i = 0; i < dev->virt_qp_nb; i++) { + cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ], destroy); + cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ], destroy); + } +} + +/* + * Release virtqueues and device memory. + */ +static void +free_device(struct virtio_net *dev) +{ + uint32_t i; + struct vhost_virtqueue *rxq, *txq; + + for (i = 0; i < dev->virt_qp_nb; i++) { + rxq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ]; + txq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ]; + + rte_free(rxq->shadow_used_ring); + rte_free(txq->shadow_used_ring); + + /* rxq and txq are allocated together as queue-pair */ + rte_free(rxq); + } + + rte_free(dev); +} + +static void +init_vring_queue(struct vhost_virtqueue *vq, int qp_idx) +{ + memset(vq, 0, sizeof(struct vhost_virtqueue)); + + vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; + vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; + + /* Backends are set to -1 indicating an inactive device. */ + vq->backend = -1; + + /* always set the default vq pair to enabled */ + if (qp_idx == 0) + vq->enabled = 1; + + TAILQ_INIT(&vq->zmbuf_list); +} + +static void +init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx) +{ + uint32_t base_idx = qp_idx * VIRTIO_QNUM; + + init_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx); + init_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx); +} + +static void +reset_vring_queue(struct vhost_virtqueue *vq, int qp_idx) +{ + int callfd; + + callfd = vq->callfd; + init_vring_queue(vq, qp_idx); + vq->callfd = callfd; +} + +static void +reset_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx) +{ + uint32_t base_idx = qp_idx * VIRTIO_QNUM; + + reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx); + reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx); +} + +int +alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx) +{ + struct vhost_virtqueue *virtqueue = NULL; + uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ; + uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ; + + virtqueue = rte_malloc(NULL, + sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0); + if (virtqueue == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to allocate memory for virt qp:%d.\n", qp_idx); + return -1; + } + + dev->virtqueue[virt_rx_q_idx] = virtqueue; + dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ; + + init_vring_queue_pair(dev, qp_idx); + + dev->virt_qp_nb += 1; + + return 0; +} + +/* + * Reset some variables in device structure, while keeping few + * others untouched, such as vid, ifname, virt_qp_nb: they + * should be same unless the device is removed. + */ +void +reset_device(struct virtio_net *dev) +{ + uint32_t i; + + dev->features = 0; + dev->protocol_features = 0; + dev->flags = 0; + + for (i = 0; i < dev->virt_qp_nb; i++) + reset_vring_queue_pair(dev, i); +} + +/* + * Invoked when there is a new vhost-user connection established (when + * there is a new virtio device being attached). + */ +int +vhost_new_device(void) +{ + struct virtio_net *dev; + int i; + + dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0); + if (dev == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to allocate memory for new dev.\n"); + return -1; + } + + for (i = 0; i < MAX_VHOST_DEVICE; i++) { + if (vhost_devices[i] == NULL) + break; + } + if (i == MAX_VHOST_DEVICE) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to find a free slot for new device.\n"); + return -1; + } + + vhost_devices[i] = dev; + dev->vid = i; + + return i; +} + +/* + * Invoked when there is the vhost-user connection is broken (when + * the virtio device is being detached). + */ +void +vhost_destroy_device(int vid) +{ + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return; + + if (dev->flags & VIRTIO_DEV_RUNNING) { + dev->flags &= ~VIRTIO_DEV_RUNNING; + notify_ops->destroy_device(vid); + } + + cleanup_device(dev, 1); + free_device(dev); + + vhost_devices[vid] = NULL; +} + +void +vhost_set_ifname(int vid, const char *if_name, unsigned int if_len) +{ + struct virtio_net *dev; + unsigned int len; + + dev = get_device(vid); + if (dev == NULL) + return; + + len = if_len > sizeof(dev->ifname) ? + sizeof(dev->ifname) : if_len; + + strncpy(dev->ifname, if_name, len); + dev->ifname[sizeof(dev->ifname) - 1] = '\0'; +} + +void +vhost_enable_dequeue_zero_copy(int vid) +{ + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return; + + dev->dequeue_zero_copy = 1; +} + +int +rte_vhost_get_numa_node(int vid) +{ +#ifdef RTE_LIBRTE_VHOST_NUMA + struct virtio_net *dev = get_device(vid); + int numa_node; + int ret; + + if (dev == NULL) + return -1; + + ret = get_mempolicy(&numa_node, NULL, 0, dev, + MPOL_F_NODE | MPOL_F_ADDR); + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%d) failed to query numa node: %d\n", vid, ret); + return -1; + } + + return numa_node; +#else + RTE_SET_USED(vid); + return -1; +#endif +} + +uint32_t +rte_vhost_get_queue_num(int vid) +{ + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return 0; + + return dev->virt_qp_nb; +} + +int +rte_vhost_get_ifname(int vid, char *buf, size_t len) +{ + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return -1; + + len = RTE_MIN(len, sizeof(dev->ifname)); + + strncpy(buf, dev->ifname, len); + buf[len - 1] = '\0'; + + return 0; +} + +uint16_t +rte_vhost_avail_entries(int vid, uint16_t queue_id) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (!dev) + return 0; + + vq = dev->virtqueue[queue_id]; + if (!vq->enabled) + return 0; + + return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx; +} + +int +rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable) +{ + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return -1; + + if (enable) { + RTE_LOG(ERR, VHOST_CONFIG, + "guest notification isn't supported.\n"); + return -1; + } + + dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY; + return 0; +} + +uint64_t rte_vhost_feature_get(void) +{ + return VHOST_FEATURES; +} + +int rte_vhost_feature_disable(uint64_t feature_mask) +{ + VHOST_FEATURES = VHOST_FEATURES & ~feature_mask; + return 0; +} + +int rte_vhost_feature_enable(uint64_t feature_mask) +{ + if ((feature_mask & VHOST_SUPPORTED_FEATURES) == feature_mask) { + VHOST_FEATURES = VHOST_FEATURES | feature_mask; + return 0; + } + return -1; +} + +/* + * Register ops so that we can add/remove device to data core. + */ +int +rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const ops) +{ + notify_ops = ops; + + return 0; +} diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h new file mode 100644 index 00000000..22564f1c --- /dev/null +++ b/lib/librte_vhost/vhost.h @@ -0,0 +1,293 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VHOST_NET_CDEV_H_ +#define _VHOST_NET_CDEV_H_ +#include +#include +#include +#include +#include +#include + +#include + +#include "rte_virtio_net.h" + +/* Used to indicate that the device is running on a data core */ +#define VIRTIO_DEV_RUNNING 1 + +/* Backend value set by guest. */ +#define VIRTIO_DEV_STOPPED -1 + +#define BUF_VECTOR_MAX 256 + +/** + * Structure contains buffer address, length and descriptor index + * from vring to do scatter RX. + */ +struct buf_vector { + uint64_t buf_addr; + uint32_t buf_len; + uint32_t desc_idx; +}; + +/* + * A structure to hold some fields needed in zero copy code path, + * mainly for associating an mbuf with the right desc_idx. + */ +struct zcopy_mbuf { + struct rte_mbuf *mbuf; + uint32_t desc_idx; + uint16_t in_use; + + TAILQ_ENTRY(zcopy_mbuf) next; +}; +TAILQ_HEAD(zcopy_mbuf_list, zcopy_mbuf); + +/** + * Structure contains variables relevant to RX/TX virtqueues. + */ +struct vhost_virtqueue { + struct vring_desc *desc; + struct vring_avail *avail; + struct vring_used *used; + uint32_t size; + + uint16_t last_avail_idx; + uint16_t last_used_idx; +#define VIRTIO_INVALID_EVENTFD (-1) +#define VIRTIO_UNINITIALIZED_EVENTFD (-2) + + /* Backend value to determine if device should started/stopped */ + int backend; + /* Used to notify the guest (trigger interrupt) */ + int callfd; + /* Currently unused as polling mode is enabled */ + int kickfd; + int enabled; + + /* Physical address of used ring, for logging */ + uint64_t log_guest_addr; + + uint16_t nr_zmbuf; + uint16_t zmbuf_size; + uint16_t last_zmbuf_idx; + struct zcopy_mbuf *zmbufs; + struct zcopy_mbuf_list zmbuf_list; + + struct vring_used_elem *shadow_used_ring; + uint16_t shadow_used_idx; +} __rte_cache_aligned; + +/* Old kernels have no such macro defined */ +#ifndef VIRTIO_NET_F_GUEST_ANNOUNCE + #define VIRTIO_NET_F_GUEST_ANNOUNCE 21 +#endif + + +/* + * Make an extra wrapper for VIRTIO_NET_F_MQ and + * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX as they are + * introduced since kernel v3.8. This makes our + * code buildable for older kernel. + */ +#ifdef VIRTIO_NET_F_MQ + #define VHOST_MAX_QUEUE_PAIRS VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX + #define VHOST_SUPPORTS_MQ (1ULL << VIRTIO_NET_F_MQ) +#else + #define VHOST_MAX_QUEUE_PAIRS 1 + #define VHOST_SUPPORTS_MQ 0 +#endif + +/* + * Define virtio 1.0 for older kernels + */ +#ifndef VIRTIO_F_VERSION_1 + #define VIRTIO_F_VERSION_1 32 +#endif + +struct guest_page { + uint64_t guest_phys_addr; + uint64_t host_phys_addr; + uint64_t size; +}; + +/** + * Device structure contains all configuration information relating + * to the device. + */ +struct virtio_net { + /* Frontend (QEMU) memory and memory region information */ + struct virtio_memory *mem; + uint64_t features; + uint64_t protocol_features; + int vid; + uint32_t flags; + uint16_t vhost_hlen; + /* to tell if we need broadcast rarp packet */ + rte_atomic16_t broadcast_rarp; + uint32_t virt_qp_nb; + int dequeue_zero_copy; + struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; +#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) + char ifname[IF_NAME_SZ]; + uint64_t log_size; + uint64_t log_base; + uint64_t log_addr; + struct ether_addr mac; + + uint32_t nr_guest_pages; + uint32_t max_guest_pages; + struct guest_page *guest_pages; +} __rte_cache_aligned; + +/** + * Information relating to memory regions including offsets to + * addresses in QEMUs memory file. + */ +struct virtio_memory_region { + uint64_t guest_phys_addr; + uint64_t guest_user_addr; + uint64_t host_user_addr; + uint64_t size; + void *mmap_addr; + uint64_t mmap_size; + int fd; +}; + + +/** + * Memory structure includes region and mapping information. + */ +struct virtio_memory { + uint32_t nregions; + struct virtio_memory_region regions[0]; +}; + + +/* Macros for printing using RTE_LOG */ +#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 +#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1 + +#ifdef RTE_LIBRTE_VHOST_DEBUG +#define VHOST_MAX_PRINT_BUFF 6072 +#define LOG_LEVEL RTE_LOG_DEBUG +#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args) +#define PRINT_PACKET(device, addr, size, header) do { \ + char *pkt_addr = (char *)(addr); \ + unsigned int index; \ + char packet[VHOST_MAX_PRINT_BUFF]; \ + \ + if ((header)) \ + snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Header size %d: ", (device->vid), (size)); \ + else \ + snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Packet size %d: ", (device->vid), (size)); \ + for (index = 0; index < (size); index++) { \ + snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \ + "%02hhx ", pkt_addr[index]); \ + } \ + snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \ + \ + LOG_DEBUG(VHOST_DATA, "%s", packet); \ +} while (0) +#else +#define LOG_LEVEL RTE_LOG_INFO +#define LOG_DEBUG(log_type, fmt, args...) do {} while (0) +#define PRINT_PACKET(device, addr, size, header) do {} while (0) +#endif + +extern uint64_t VHOST_FEATURES; +#define MAX_VHOST_DEVICE 1024 +extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; + +/* Convert guest physical Address to host virtual address */ +static inline uint64_t __attribute__((always_inline)) +gpa_to_vva(struct virtio_net *dev, uint64_t gpa) +{ + struct virtio_memory_region *reg; + uint32_t i; + + for (i = 0; i < dev->mem->nregions; i++) { + reg = &dev->mem->regions[i]; + if (gpa >= reg->guest_phys_addr && + gpa < reg->guest_phys_addr + reg->size) { + return gpa - reg->guest_phys_addr + + reg->host_user_addr; + } + } + + return 0; +} + +/* Convert guest physical address to host physical address */ +static inline phys_addr_t __attribute__((always_inline)) +gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size) +{ + uint32_t i; + struct guest_page *page; + + for (i = 0; i < dev->nr_guest_pages; i++) { + page = &dev->guest_pages[i]; + + if (gpa >= page->guest_phys_addr && + gpa + size < page->guest_phys_addr + page->size) { + return gpa - page->guest_phys_addr + + page->host_phys_addr; + } + } + + return 0; +} + +struct virtio_net_device_ops const *notify_ops; +struct virtio_net *get_device(int vid); + +int vhost_new_device(void); +void cleanup_device(struct virtio_net *dev, int destroy); +void reset_device(struct virtio_net *dev); +void vhost_destroy_device(int); + +int alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx); + +void vhost_set_ifname(int, const char *if_name, unsigned int if_len); +void vhost_enable_dequeue_zero_copy(int vid); + +/* + * Backend-specific cleanup. + * + * TODO: fix it; we have one backend now + */ +void vhost_backend_cleanup(struct virtio_net *dev); + +#endif /* _VHOST_NET_CDEV_H_ */ diff --git a/lib/librte_vhost/vhost_cuse/eventfd_copy.c b/lib/librte_vhost/vhost_cuse/eventfd_copy.c deleted file mode 100644 index 154b32a4..00000000 --- a/lib/librte_vhost/vhost_cuse/eventfd_copy.c +++ /dev/null @@ -1,104 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include - -#include - -#include "eventfd_link/eventfd_link.h" -#include "eventfd_copy.h" -#include "vhost-net.h" - -static const char eventfd_cdev[] = "/dev/eventfd-link"; - -static int eventfd_link = -1; - -int -eventfd_init(void) -{ - if (eventfd_link >= 0) - return 0; - - eventfd_link = open(eventfd_cdev, O_RDWR); - if (eventfd_link < 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "eventfd_link module is not loaded\n"); - return -1; - } - - return 0; -} - -int -eventfd_free(void) -{ - if (eventfd_link >= 0) - close(eventfd_link); - return 0; -} - -/* - * This function uses the eventfd_link kernel module to copy an eventfd file - * descriptor provided by QEMU in to our process space. - */ -int -eventfd_copy(int target_fd, int target_pid) -{ - int ret; - struct eventfd_copy2 eventfd_copy2; - - - /* Open the character device to the kernel module. */ - /* TODO: check this earlier rather than fail until VM boots! */ - if (eventfd_init() < 0) - return -1; - - eventfd_copy2.fd = target_fd; - eventfd_copy2.pid = target_pid; - eventfd_copy2.flags = O_NONBLOCK | O_CLOEXEC; - /* Call the IOCTL to copy the eventfd. */ - ret = ioctl(eventfd_link, EVENTFD_COPY2, &eventfd_copy2); - - if (ret < 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "EVENTFD_COPY2 ioctl failed\n"); - return -1; - } - - return ret; -} diff --git a/lib/librte_vhost/vhost_cuse/eventfd_copy.h b/lib/librte_vhost/vhost_cuse/eventfd_copy.h deleted file mode 100644 index 5f446ca0..00000000 --- a/lib/librte_vhost/vhost_cuse/eventfd_copy.h +++ /dev/null @@ -1,45 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef _EVENTFD_H -#define _EVENTFD_H - -int -eventfd_init(void); - -int -eventfd_free(void); - -int -eventfd_copy(int target_fd, int target_pid); - -#endif diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c deleted file mode 100644 index 5d150116..00000000 --- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c +++ /dev/null @@ -1,431 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "virtio-net-cdev.h" -#include "vhost-net.h" -#include "eventfd_copy.h" - -#define FUSE_OPT_DUMMY "\0\0" -#define FUSE_OPT_FORE "-f\0\0" -#define FUSE_OPT_NOMULTI "-s\0\0" - -static const uint32_t default_major = 231; -static const uint32_t default_minor = 1; -static const char cuse_device_name[] = "/dev/cuse"; -static const char default_cdev[] = "vhost-net"; - -static struct fuse_session *session; - -/* - * Returns vhost_cuse_device_ctx from given fuse_req_t. The - * index is populated later when the device is added to the - * device linked list. - */ -static struct vhost_cuse_device_ctx -fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi) -{ - struct vhost_cuse_device_ctx ctx; - struct fuse_ctx const *const req_ctx = fuse_req_ctx(req); - - ctx.pid = req_ctx->pid; - ctx.vid = (int)fi->fh; - - return ctx; -} - -/* - * When the device is created in QEMU it gets initialised here and - * added to the device linked list. - */ -static void -vhost_net_open(fuse_req_t req, struct fuse_file_info *fi) -{ - int vid = 0; - - vid = vhost_new_device(); - if (vid == -1) { - fuse_reply_err(req, EPERM); - return; - } - - fi->fh = vid; - - RTE_LOG(INFO, VHOST_CONFIG, - "(%d) device configuration started\n", vid); - fuse_reply_open(req, fi); -} - -/* - * When QEMU is shutdown or killed the device gets released. - */ -static void -vhost_net_release(fuse_req_t req, struct fuse_file_info *fi) -{ - int err = 0; - struct vhost_cuse_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi); - - vhost_destroy_device(ctx.vid); - RTE_LOG(INFO, VHOST_CONFIG, "(%d) device released\n", ctx.vid); - fuse_reply_err(req, err); -} - -/* - * Boilerplate code for CUSE IOCTL - * Implicit arguments: vid, req, result. - */ -#define VHOST_IOCTL(func) do { \ - result = (func)(vid); \ - fuse_reply_ioctl(req, result, NULL, 0); \ -} while (0) - -/* - * Boilerplate IOCTL RETRY - * Implicit arguments: req. - */ -#define VHOST_IOCTL_RETRY(size_r, size_w) do { \ - struct iovec iov_r = { arg, (size_r) }; \ - struct iovec iov_w = { arg, (size_w) }; \ - fuse_reply_ioctl_retry(req, &iov_r, \ - (size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\ -} while (0) - -/* - * Boilerplate code for CUSE Read IOCTL - * Implicit arguments: vid, req, result, in_bufsz, in_buf. - */ -#define VHOST_IOCTL_R(type, var, func) do { \ - if (!in_bufsz) { \ - VHOST_IOCTL_RETRY(sizeof(type), 0);\ - } else { \ - (var) = *(const type*)in_buf; \ - result = func(vid, &(var)); \ - fuse_reply_ioctl(req, result, NULL, 0);\ - } \ -} while (0) - -/* - * Boilerplate code for CUSE Write IOCTL - * Implicit arguments: vid, req, result, out_bufsz. - */ -#define VHOST_IOCTL_W(type, var, func) do { \ - if (!out_bufsz) { \ - VHOST_IOCTL_RETRY(0, sizeof(type));\ - } else { \ - result = (func)(vid, &(var));\ - fuse_reply_ioctl(req, result, &(var), sizeof(type));\ - } \ -} while (0) - -/* - * Boilerplate code for CUSE Read/Write IOCTL - * Implicit arguments: vid, req, result, in_bufsz, in_buf. - */ -#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \ - if (!in_bufsz) { \ - VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\ - } else { \ - (var1) = *(const type1*) (in_buf); \ - result = (func)(vid, (var1), &(var2)); \ - fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\ - } \ -} while (0) - -/* - * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type - * of IOCTL a buffer is requested to read or to write. This request is handled - * by FUSE and the buffer is then given to CUSE. - */ -static void -vhost_net_ioctl(fuse_req_t req, int cmd, void *arg, - struct fuse_file_info *fi, __rte_unused unsigned flags, - const void *in_buf, size_t in_bufsz, size_t out_bufsz) -{ - struct vhost_cuse_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi); - struct vhost_vring_file file; - struct vhost_vring_state state; - struct vhost_vring_addr addr; - uint64_t features; - uint32_t index; - int result = 0; - int vid = ctx.vid; - - switch (cmd) { - case VHOST_NET_SET_BACKEND: - LOG_DEBUG(VHOST_CONFIG, - "(%d) IOCTL: VHOST_NET_SET_BACKEND\n", ctx.vid); - if (!in_buf) { - VHOST_IOCTL_RETRY(sizeof(file), 0); - break; - } - file = *(const struct vhost_vring_file *)in_buf; - result = cuse_set_backend(ctx, &file); - fuse_reply_ioctl(req, result, NULL, 0); - break; - - case VHOST_GET_FEATURES: - LOG_DEBUG(VHOST_CONFIG, - "(%d) IOCTL: VHOST_GET_FEATURES\n", vid); - VHOST_IOCTL_W(uint64_t, features, vhost_get_features); - break; - - case VHOST_SET_FEATURES: - LOG_DEBUG(VHOST_CONFIG, - "(%d) IOCTL: VHOST_SET_FEATURES\n", vid); - VHOST_IOCTL_R(uint64_t, features, vhost_set_features); - break; - - case VHOST_RESET_OWNER: - LOG_DEBUG(VHOST_CONFIG, - "(%d) IOCTL: VHOST_RESET_OWNER\n", vid); - VHOST_IOCTL(vhost_reset_owner); - break; - - case VHOST_SET_OWNER: - LOG_DEBUG(VHOST_CONFIG, - "(%d) IOCTL: VHOST_SET_OWNER\n", vid); - VHOST_IOCTL(vhost_set_owner); - break; - - case VHOST_SET_MEM_TABLE: - /*TODO fix race condition.*/ - LOG_DEBUG(VHOST_CONFIG, - "(%d) IOCTL: VHOST_SET_MEM_TABLE\n", vid); - static struct vhost_memory mem_temp; - - switch (in_bufsz) { - case 0: - VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0); - break; - - case sizeof(struct vhost_memory): - mem_temp = *(const struct vhost_memory *) in_buf; - - if (mem_temp.nregions > 0) { - VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) + - (sizeof(struct vhost_memory_region) * - mem_temp.nregions), 0); - } else { - result = -1; - fuse_reply_ioctl(req, result, NULL, 0); - } - break; - - default: - result = cuse_set_mem_table(ctx, in_buf, - mem_temp.nregions); - if (result) - fuse_reply_err(req, EINVAL); - else - fuse_reply_ioctl(req, result, NULL, 0); - } - break; - - case VHOST_SET_VRING_NUM: - LOG_DEBUG(VHOST_CONFIG, - "(%d) IOCTL: VHOST_SET_VRING_NUM\n", vid); - VHOST_IOCTL_R(struct vhost_vring_state, state, - vhost_set_vring_num); - break; - - case VHOST_SET_VRING_BASE: - LOG_DEBUG(VHOST_CONFIG, - "(%d) IOCTL: VHOST_SET_VRING_BASE\n", vid); - VHOST_IOCTL_R(struct vhost_vring_state, state, - vhost_set_vring_base); - break; - - case VHOST_GET_VRING_BASE: - LOG_DEBUG(VHOST_CONFIG, - "(%d) IOCTL: VHOST_GET_VRING_BASE\n", vid); - VHOST_IOCTL_RW(uint32_t, index, - struct vhost_vring_state, state, vhost_get_vring_base); - break; - - case VHOST_SET_VRING_ADDR: - LOG_DEBUG(VHOST_CONFIG, - "(%d) IOCTL: VHOST_SET_VRING_ADDR\n", vid); - VHOST_IOCTL_R(struct vhost_vring_addr, addr, - vhost_set_vring_addr); - break; - - case VHOST_SET_VRING_KICK: - case VHOST_SET_VRING_CALL: - if (cmd == VHOST_SET_VRING_KICK) - LOG_DEBUG(VHOST_CONFIG, - "(%d) IOCTL: VHOST_SET_VRING_KICK\n", vid); - else - LOG_DEBUG(VHOST_CONFIG, - "(%d) IOCTL: VHOST_SET_VRING_CALL\n", vid); - if (!in_buf) - VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0); - else { - int fd; - file = *(const struct vhost_vring_file *)in_buf; - LOG_DEBUG(VHOST_CONFIG, - "idx:%d fd:%d\n", file.index, file.fd); - fd = eventfd_copy(file.fd, ctx.pid); - if (fd < 0) { - fuse_reply_ioctl(req, -1, NULL, 0); - result = -1; - break; - } - file.fd = fd; - if (cmd == VHOST_SET_VRING_KICK) { - result = vhost_set_vring_kick(vid, &file); - fuse_reply_ioctl(req, result, NULL, 0); - } else { - result = vhost_set_vring_call(vid, &file); - fuse_reply_ioctl(req, result, NULL, 0); - } - } - break; - - default: - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) IOCTL: DOESN NOT EXIST\n", vid); - result = -1; - fuse_reply_ioctl(req, result, NULL, 0); - } - - if (result < 0) - LOG_DEBUG(VHOST_CONFIG, - "(%d) IOCTL: FAIL\n", vid); - else - LOG_DEBUG(VHOST_CONFIG, - "(%d) IOCTL: SUCCESS\n", vid); -} - -/* - * Structure handling open, release and ioctl function pointers is populated. - */ -static const struct cuse_lowlevel_ops vhost_net_ops = { - .open = vhost_net_open, - .release = vhost_net_release, - .ioctl = vhost_net_ioctl, -}; - -/* - * cuse_info is populated and used to register the cuse device. - * vhost_net_device_ops are also passed when the device is registered in app. - */ -int -rte_vhost_driver_register(const char *dev_name, uint64_t flags) -{ - struct cuse_info cuse_info; - char device_name[PATH_MAX] = ""; - char char_device_name[PATH_MAX] = ""; - const char *device_argv[] = { device_name }; - - char fuse_opt_dummy[] = FUSE_OPT_DUMMY; - char fuse_opt_fore[] = FUSE_OPT_FORE; - char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI; - char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti}; - - if (flags) { - RTE_LOG(ERR, VHOST_CONFIG, - "vhost-cuse does not support any flags so far\n"); - return -1; - } - - if (access(cuse_device_name, R_OK | W_OK) < 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "char device %s can't be accessed, maybe not exist\n", - cuse_device_name); - return -1; - } - - if (eventfd_init() < 0) - return -1; - - /* - * The device name is created. This is passed to QEMU so that it can - * register the device with our application. - */ - snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name); - snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name); - - /* Check if device already exists. */ - if (access(char_device_name, F_OK) != -1) { - RTE_LOG(ERR, VHOST_CONFIG, - "char device %s already exists\n", char_device_name); - return -1; - } - - memset(&cuse_info, 0, sizeof(cuse_info)); - cuse_info.dev_major = default_major; - cuse_info.dev_minor = default_minor; - cuse_info.dev_info_argc = 1; - cuse_info.dev_info_argv = device_argv; - cuse_info.flags = CUSE_UNRESTRICTED_IOCTL; - - session = cuse_lowlevel_setup(3, fuse_argv, - &cuse_info, &vhost_net_ops, 0, NULL); - if (session == NULL) - return -1; - - return 0; -} - -/** - * An empty function for unregister - */ -int -rte_vhost_driver_unregister(const char *dev_name __rte_unused) -{ - return 0; -} - -/** - * The CUSE session is launched allowing the application to receive open, - * release and ioctl calls. - */ -int -rte_vhost_driver_session_start(void) -{ - fuse_session_loop(session); - - return 0; -} diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c deleted file mode 100644 index 552be7d4..00000000 --- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c +++ /dev/null @@ -1,433 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "rte_virtio_net.h" -#include "vhost-net.h" -#include "virtio-net-cdev.h" -#include "eventfd_copy.h" - -/* Line size for reading maps file. */ -static const uint32_t BUFSIZE = PATH_MAX; - -/* Size of prot char array in procmap. */ -#define PROT_SZ 5 - -/* Number of elements in procmap struct. */ -#define PROCMAP_SZ 8 - -/* Structure containing information gathered from maps file. */ -struct procmap { - uint64_t va_start; /* Start virtual address in file. */ - uint64_t len; /* Size of file. */ - uint64_t pgoff; /* Not used. */ - uint32_t maj; /* Not used. */ - uint32_t min; /* Not used. */ - uint32_t ino; /* Not used. */ - char prot[PROT_SZ]; /* Not used. */ - char fname[PATH_MAX]; /* File name. */ -}; - -/* - * Locate the file containing QEMU's memory space and - * map it to our address space. - */ -static int -host_memory_map(pid_t pid, uint64_t addr, - uint64_t *mapped_address, uint64_t *mapped_size) -{ - struct dirent *dptr = NULL; - struct procmap procmap; - DIR *dp = NULL; - int fd; - int i; - char memfile[PATH_MAX]; - char mapfile[PATH_MAX]; - char procdir[PATH_MAX]; - char resolved_path[PATH_MAX]; - char *path = NULL; - FILE *fmap; - void *map; - uint8_t found = 0; - char line[BUFSIZE]; - char dlm[] = "- : "; - char *str, *sp, *in[PROCMAP_SZ]; - char *end = NULL; - - /* Path where mem files are located. */ - snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid); - /* Maps file used to locate mem file. */ - snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid); - - fmap = fopen(mapfile, "r"); - if (fmap == NULL) { - RTE_LOG(ERR, VHOST_CONFIG, - "Failed to open maps file for pid %d\n", - pid); - return -1; - } - - /* Read through maps file until we find out base_address. */ - while (fgets(line, BUFSIZE, fmap) != 0) { - str = line; - errno = 0; - /* Split line into fields. */ - for (i = 0; i < PROCMAP_SZ; i++) { - in[i] = strtok_r(str, &dlm[i], &sp); - if ((in[i] == NULL) || (errno != 0)) { - fclose(fmap); - return -1; - } - str = NULL; - } - - /* Convert/Copy each field as needed. */ - procmap.va_start = strtoull(in[0], &end, 16); - if ((in[0] == '\0') || (end == NULL) || (*end != '\0') || - (errno != 0)) { - fclose(fmap); - return -1; - } - - procmap.len = strtoull(in[1], &end, 16); - if ((in[1] == '\0') || (end == NULL) || (*end != '\0') || - (errno != 0)) { - fclose(fmap); - return -1; - } - - procmap.pgoff = strtoull(in[3], &end, 16); - if ((in[3] == '\0') || (end == NULL) || (*end != '\0') || - (errno != 0)) { - fclose(fmap); - return -1; - } - - procmap.maj = strtoul(in[4], &end, 16); - if ((in[4] == '\0') || (end == NULL) || (*end != '\0') || - (errno != 0)) { - fclose(fmap); - return -1; - } - - procmap.min = strtoul(in[5], &end, 16); - if ((in[5] == '\0') || (end == NULL) || (*end != '\0') || - (errno != 0)) { - fclose(fmap); - return -1; - } - - procmap.ino = strtoul(in[6], &end, 16); - if ((in[6] == '\0') || (end == NULL) || (*end != '\0') || - (errno != 0)) { - fclose(fmap); - return -1; - } - - memcpy(&procmap.prot, in[2], PROT_SZ); - memcpy(&procmap.fname, in[7], PATH_MAX); - - if (procmap.va_start == addr) { - procmap.len = procmap.len - procmap.va_start; - found = 1; - break; - } - } - fclose(fmap); - - if (!found) { - RTE_LOG(ERR, VHOST_CONFIG, - "Failed to find memory file in pid %d maps file\n", - pid); - return -1; - } - - /* Find the guest memory file among the process fds. */ - dp = opendir(procdir); - if (dp == NULL) { - RTE_LOG(ERR, VHOST_CONFIG, - "Cannot open pid %d process directory\n", - pid); - return -1; - } - - found = 0; - - /* Read the fd directory contents. */ - while (NULL != (dptr = readdir(dp))) { - snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s", - pid, dptr->d_name); - path = realpath(memfile, resolved_path); - if ((path == NULL) && (strlen(resolved_path) == 0)) { - RTE_LOG(ERR, VHOST_CONFIG, - "Failed to resolve fd directory\n"); - closedir(dp); - return -1; - } - if (strncmp(resolved_path, procmap.fname, - strnlen(procmap.fname, PATH_MAX)) == 0) { - found = 1; - break; - } - } - - closedir(dp); - - if (found == 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "Failed to find memory file for pid %d\n", - pid); - return -1; - } - /* Open the shared memory file and map the memory into this process. */ - fd = open(memfile, O_RDWR); - - if (fd == -1) { - RTE_LOG(ERR, VHOST_CONFIG, - "Failed to open %s for pid %d\n", - memfile, pid); - return -1; - } - - map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE, - MAP_POPULATE|MAP_SHARED, fd, 0); - close(fd); - - if (map == MAP_FAILED) { - RTE_LOG(ERR, VHOST_CONFIG, - "Error mapping the file %s for pid %d\n", - memfile, pid); - return -1; - } - - /* Store the memory address and size in the device data structure */ - *mapped_address = (uint64_t)(uintptr_t)map; - *mapped_size = procmap.len; - - LOG_DEBUG(VHOST_CONFIG, - "Mem File: %s->%s - Size: %llu - VA: %p\n", - memfile, resolved_path, - (unsigned long long)*mapped_size, map); - - return 0; -} - -int -cuse_set_mem_table(struct vhost_cuse_device_ctx ctx, - const struct vhost_memory *mem_regions_addr, uint32_t nregions) -{ - uint64_t size = offsetof(struct vhost_memory, regions); - uint32_t idx, valid_regions; - struct virtio_memory_regions *pregion; - struct vhost_memory_region *mem_regions = (void *)(uintptr_t) - ((uint64_t)(uintptr_t)mem_regions_addr + size); - uint64_t base_address = 0, mapped_address, mapped_size; - struct virtio_net *dev; - - dev = get_device(ctx.vid); - if (dev == NULL) - return -1; - - if (dev->mem && dev->mem->mapped_address) { - munmap((void *)(uintptr_t)dev->mem->mapped_address, - (size_t)dev->mem->mapped_size); - free(dev->mem); - dev->mem = NULL; - } - - dev->mem = calloc(1, sizeof(struct virtio_memory) + - sizeof(struct virtio_memory_regions) * nregions); - if (dev->mem == NULL) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) failed to allocate memory for dev->mem\n", - dev->vid); - return -1; - } - - pregion = &dev->mem->regions[0]; - - for (idx = 0; idx < nregions; idx++) { - pregion[idx].guest_phys_address = - mem_regions[idx].guest_phys_addr; - pregion[idx].guest_phys_address_end = - pregion[idx].guest_phys_address + - mem_regions[idx].memory_size; - pregion[idx].memory_size = - mem_regions[idx].memory_size; - pregion[idx].userspace_address = - mem_regions[idx].userspace_addr; - - LOG_DEBUG(VHOST_CONFIG, - "REGION: %u - GPA: %p - QVA: %p - SIZE (%"PRIu64")\n", - idx, - (void *)(uintptr_t)pregion[idx].guest_phys_address, - (void *)(uintptr_t)pregion[idx].userspace_address, - pregion[idx].memory_size); - - /*set the base address mapping*/ - if (pregion[idx].guest_phys_address == 0x0) { - base_address = - pregion[idx].userspace_address; - /* Map VM memory file */ - if (host_memory_map(ctx.pid, base_address, - &mapped_address, &mapped_size) != 0) { - free(dev->mem); - dev->mem = NULL; - return -1; - } - dev->mem->mapped_address = mapped_address; - dev->mem->base_address = base_address; - dev->mem->mapped_size = mapped_size; - } - } - - /* Check that we have a valid base address. */ - if (base_address == 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "Failed to find base address of qemu memory file.\n"); - free(dev->mem); - dev->mem = NULL; - return -1; - } - - valid_regions = nregions; - for (idx = 0; idx < nregions; idx++) { - if ((pregion[idx].userspace_address < base_address) || - (pregion[idx].userspace_address > - (base_address + mapped_size))) - valid_regions--; - } - - - if (valid_regions != nregions) { - valid_regions = 0; - for (idx = nregions; 0 != idx--; ) { - if ((pregion[idx].userspace_address < base_address) || - (pregion[idx].userspace_address > - (base_address + mapped_size))) { - memmove(&pregion[idx], &pregion[idx + 1], - sizeof(struct virtio_memory_regions) * - valid_regions); - } else - valid_regions++; - } - } - - for (idx = 0; idx < valid_regions; idx++) { - pregion[idx].address_offset = - mapped_address - base_address + - pregion[idx].userspace_address - - pregion[idx].guest_phys_address; - } - dev->mem->nregions = valid_regions; - - return 0; -} - -/* - * Function to get the tap device name from the provided file descriptor and - * save it in the device structure. - */ -static int -get_ifname(int vid, int tap_fd, int pid) -{ - int fd_tap; - struct ifreq ifr; - uint32_t ifr_size; - int ret; - - fd_tap = eventfd_copy(tap_fd, pid); - if (fd_tap < 0) - return -1; - - ret = ioctl(fd_tap, TUNGETIFF, &ifr); - - if (close(fd_tap) < 0) - RTE_LOG(ERR, VHOST_CONFIG, "(%d) fd close failed\n", vid); - - if (ret >= 0) { - ifr_size = strnlen(ifr.ifr_name, sizeof(ifr.ifr_name)); - vhost_set_ifname(vid, ifr.ifr_name, ifr_size); - } else - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) TUNGETIFF ioctl failed\n", vid); - - return 0; -} - -int -cuse_set_backend(struct vhost_cuse_device_ctx ctx, - struct vhost_vring_file *file) -{ - struct virtio_net *dev; - - dev = get_device(ctx.vid); - if (dev == NULL) - return -1; - - if (!(dev->flags & VIRTIO_DEV_RUNNING) && file->fd != VIRTIO_DEV_STOPPED) - get_ifname(ctx.vid, file->fd, ctx.pid); - - return vhost_set_backend(ctx.vid, file); -} - -void -vhost_backend_cleanup(struct virtio_net *dev) -{ - /* Unmap QEMU memory file if mapped. */ - if (dev->mem) { - munmap((void *)(uintptr_t)dev->mem->mapped_address, - (size_t)dev->mem->mapped_size); - free(dev->mem); - dev->mem = NULL; - } -} diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h deleted file mode 100644 index 3f67154b..00000000 --- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h +++ /dev/null @@ -1,56 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef _VIRTIO_NET_CDEV_H -#define _VIRTIO_NET_CDEV_H - -#include -#include - -#include "vhost-net.h" - -/* - * Structure used to identify device context. - */ -struct vhost_cuse_device_ctx { - pid_t pid; /* PID of process calling the IOCTL. */ - int vid; /* Virtio-net device ID */ -}; - -int -cuse_set_mem_table(struct vhost_cuse_device_ctx ctx, - const struct vhost_memory *mem_regions_addr, uint32_t nregions); - -int -cuse_set_backend(struct vhost_cuse_device_ctx ctx, struct vhost_vring_file *); - -#endif diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c deleted file mode 100644 index 5806f99a..00000000 --- a/lib/librte_vhost/vhost_rxtx.c +++ /dev/null @@ -1,931 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "vhost-net.h" - -#define MAX_PKT_BURST 32 -#define VHOST_LOG_PAGE 4096 - -static inline void __attribute__((always_inline)) -vhost_log_page(uint8_t *log_base, uint64_t page) -{ - log_base[page / 8] |= 1 << (page % 8); -} - -static inline void __attribute__((always_inline)) -vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) -{ - uint64_t page; - - if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) || - !dev->log_base || !len)) - return; - - if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8))) - return; - - /* To make sure guest memory updates are committed before logging */ - rte_smp_wmb(); - - page = addr / VHOST_LOG_PAGE; - while (page * VHOST_LOG_PAGE < addr + len) { - vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); - page += 1; - } -} - -static inline void __attribute__((always_inline)) -vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint64_t offset, uint64_t len) -{ - vhost_log_write(dev, vq->log_guest_addr + offset, len); -} - -static bool -is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb) -{ - return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM; -} - -static void -virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) -{ - if (m_buf->ol_flags & PKT_TX_L4_MASK) { - net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; - - switch (m_buf->ol_flags & PKT_TX_L4_MASK) { - case PKT_TX_TCP_CKSUM: - net_hdr->csum_offset = (offsetof(struct tcp_hdr, - cksum)); - break; - case PKT_TX_UDP_CKSUM: - net_hdr->csum_offset = (offsetof(struct udp_hdr, - dgram_cksum)); - break; - case PKT_TX_SCTP_CKSUM: - net_hdr->csum_offset = (offsetof(struct sctp_hdr, - cksum)); - break; - } - } - - if (m_buf->ol_flags & PKT_TX_TCP_SEG) { - if (m_buf->ol_flags & PKT_TX_IPV4) - net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; - else - net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; - net_hdr->gso_size = m_buf->tso_segsz; - net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len - + m_buf->l4_len; - } -} - -static inline void -copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr, - struct virtio_net_hdr_mrg_rxbuf hdr) -{ - if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf)) - *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr; - else - *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr; -} - -static inline int __attribute__((always_inline)) -copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, - struct rte_mbuf *m, uint16_t desc_idx) -{ - uint32_t desc_avail, desc_offset; - uint32_t mbuf_avail, mbuf_offset; - uint32_t cpy_len; - struct vring_desc *desc; - uint64_t desc_addr; - struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; - - desc = &vq->desc[desc_idx]; - desc_addr = gpa_to_vva(dev, desc->addr); - /* - * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid - * performance issue with some versions of gcc (4.8.4 and 5.3.0) which - * otherwise stores offset on the stack instead of in a register. - */ - if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) - return -1; - - rte_prefetch0((void *)(uintptr_t)desc_addr); - - virtio_enqueue_offload(m, &virtio_hdr.hdr); - copy_virtio_net_hdr(dev, desc_addr, virtio_hdr); - vhost_log_write(dev, desc->addr, dev->vhost_hlen); - PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); - - desc_offset = dev->vhost_hlen; - desc_avail = desc->len - dev->vhost_hlen; - - mbuf_avail = rte_pktmbuf_data_len(m); - mbuf_offset = 0; - while (mbuf_avail != 0 || m->next != NULL) { - /* done with current mbuf, fetch next */ - if (mbuf_avail == 0) { - m = m->next; - - mbuf_offset = 0; - mbuf_avail = rte_pktmbuf_data_len(m); - } - - /* done with current desc buf, fetch next */ - if (desc_avail == 0) { - if ((desc->flags & VRING_DESC_F_NEXT) == 0) { - /* Room in vring buffer is not enough */ - return -1; - } - if (unlikely(desc->next >= vq->size)) - return -1; - - desc = &vq->desc[desc->next]; - desc_addr = gpa_to_vva(dev, desc->addr); - if (unlikely(!desc_addr)) - return -1; - - desc_offset = 0; - desc_avail = desc->len; - } - - cpy_len = RTE_MIN(desc_avail, mbuf_avail); - rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)), - rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), - cpy_len); - vhost_log_write(dev, desc->addr + desc_offset, cpy_len); - PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), - cpy_len, 0); - - mbuf_avail -= cpy_len; - mbuf_offset += cpy_len; - desc_avail -= cpy_len; - desc_offset += cpy_len; - } - - return 0; -} - -/** - * This function adds buffers to the virtio devices RX virtqueue. Buffers can - * be received from the physical port or from another virtio device. A packet - * count is returned to indicate the number of packets that are succesfully - * added to the RX queue. This function works when the mbuf is scattered, but - * it doesn't support the mergeable feature. - */ -static inline uint32_t __attribute__((always_inline)) -virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, - struct rte_mbuf **pkts, uint32_t count) -{ - struct vhost_virtqueue *vq; - uint16_t avail_idx, free_entries, start_idx; - uint16_t desc_indexes[MAX_PKT_BURST]; - uint16_t used_idx; - uint32_t i; - - LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { - RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", - dev->vid, __func__, queue_id); - return 0; - } - - vq = dev->virtqueue[queue_id]; - if (unlikely(vq->enabled == 0)) - return 0; - - avail_idx = *((volatile uint16_t *)&vq->avail->idx); - start_idx = vq->last_used_idx; - free_entries = avail_idx - start_idx; - count = RTE_MIN(count, free_entries); - count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST); - if (count == 0) - return 0; - - LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n", - dev->vid, start_idx, start_idx + count); - - /* Retrieve all of the desc indexes first to avoid caching issues. */ - rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]); - for (i = 0; i < count; i++) { - used_idx = (start_idx + i) & (vq->size - 1); - desc_indexes[i] = vq->avail->ring[used_idx]; - vq->used->ring[used_idx].id = desc_indexes[i]; - vq->used->ring[used_idx].len = pkts[i]->pkt_len + - dev->vhost_hlen; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, ring[used_idx]), - sizeof(vq->used->ring[used_idx])); - } - - rte_prefetch0(&vq->desc[desc_indexes[0]]); - for (i = 0; i < count; i++) { - uint16_t desc_idx = desc_indexes[i]; - int err; - - err = copy_mbuf_to_desc(dev, vq, pkts[i], desc_idx); - if (unlikely(err)) { - used_idx = (start_idx + i) & (vq->size - 1); - vq->used->ring[used_idx].len = dev->vhost_hlen; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, ring[used_idx]), - sizeof(vq->used->ring[used_idx])); - } - - if (i + 1 < count) - rte_prefetch0(&vq->desc[desc_indexes[i+1]]); - } - - rte_smp_wmb(); - - *(volatile uint16_t *)&vq->used->idx += count; - vq->last_used_idx += count; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, idx), - sizeof(vq->used->idx)); - - /* flush used->idx update before we read avail->flags. */ - rte_mb(); - - /* Kick the guest if necessary. */ - if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) - && (vq->callfd >= 0)) - eventfd_write(vq->callfd, (eventfd_t)1); - return count; -} - -static inline int -fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx, - uint32_t *allocated, uint32_t *vec_idx, - struct buf_vector *buf_vec) -{ - uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; - uint32_t vec_id = *vec_idx; - uint32_t len = *allocated; - - while (1) { - if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size)) - return -1; - - len += vq->desc[idx].len; - buf_vec[vec_id].buf_addr = vq->desc[idx].addr; - buf_vec[vec_id].buf_len = vq->desc[idx].len; - buf_vec[vec_id].desc_idx = idx; - vec_id++; - - if ((vq->desc[idx].flags & VRING_DESC_F_NEXT) == 0) - break; - - idx = vq->desc[idx].next; - } - - *allocated = len; - *vec_idx = vec_id; - - return 0; -} - -/* - * Returns -1 on fail, 0 on success - */ -static inline int -reserve_avail_buf_mergeable(struct vhost_virtqueue *vq, uint32_t size, - uint16_t *end, struct buf_vector *buf_vec) -{ - uint16_t cur_idx; - uint16_t avail_idx; - uint32_t allocated = 0; - uint32_t vec_idx = 0; - uint16_t tries = 0; - - cur_idx = vq->last_used_idx; - - while (1) { - avail_idx = *((volatile uint16_t *)&vq->avail->idx); - if (unlikely(cur_idx == avail_idx)) - return -1; - - if (unlikely(fill_vec_buf(vq, cur_idx, &allocated, - &vec_idx, buf_vec) < 0)) - return -1; - - cur_idx++; - tries++; - - if (allocated >= size) - break; - - /* - * if we tried all available ring items, and still - * can't get enough buf, it means something abnormal - * happened. - */ - if (unlikely(tries >= vq->size)) - return -1; - } - - *end = cur_idx; - return 0; -} - -static inline uint32_t __attribute__((always_inline)) -copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint16_t end_idx, struct rte_mbuf *m, - struct buf_vector *buf_vec) -{ - struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; - uint32_t vec_idx = 0; - uint16_t start_idx = vq->last_used_idx; - uint16_t cur_idx = start_idx; - uint64_t desc_addr; - uint32_t desc_chain_head; - uint32_t desc_chain_len; - uint32_t mbuf_offset, mbuf_avail; - uint32_t desc_offset, desc_avail; - uint32_t cpy_len; - uint16_t desc_idx, used_idx; - - if (unlikely(m == NULL)) - return 0; - - LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n", - dev->vid, cur_idx, end_idx); - - desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr); - if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) - return 0; - - rte_prefetch0((void *)(uintptr_t)desc_addr); - - virtio_hdr.num_buffers = end_idx - start_idx; - LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n", - dev->vid, virtio_hdr.num_buffers); - - virtio_enqueue_offload(m, &virtio_hdr.hdr); - copy_virtio_net_hdr(dev, desc_addr, virtio_hdr); - vhost_log_write(dev, buf_vec[vec_idx].buf_addr, dev->vhost_hlen); - PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); - - desc_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; - desc_offset = dev->vhost_hlen; - desc_chain_head = buf_vec[vec_idx].desc_idx; - desc_chain_len = desc_offset; - - mbuf_avail = rte_pktmbuf_data_len(m); - mbuf_offset = 0; - while (mbuf_avail != 0 || m->next != NULL) { - /* done with current desc buf, get the next one */ - if (desc_avail == 0) { - desc_idx = buf_vec[vec_idx].desc_idx; - vec_idx++; - - if (!(vq->desc[desc_idx].flags & VRING_DESC_F_NEXT)) { - /* Update used ring with desc information */ - used_idx = cur_idx++ & (vq->size - 1); - vq->used->ring[used_idx].id = desc_chain_head; - vq->used->ring[used_idx].len = desc_chain_len; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, - ring[used_idx]), - sizeof(vq->used->ring[used_idx])); - desc_chain_head = buf_vec[vec_idx].desc_idx; - desc_chain_len = 0; - } - - desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr); - if (unlikely(!desc_addr)) - return 0; - - /* Prefetch buffer address. */ - rte_prefetch0((void *)(uintptr_t)desc_addr); - desc_offset = 0; - desc_avail = buf_vec[vec_idx].buf_len; - } - - /* done with current mbuf, get the next one */ - if (mbuf_avail == 0) { - m = m->next; - - mbuf_offset = 0; - mbuf_avail = rte_pktmbuf_data_len(m); - } - - cpy_len = RTE_MIN(desc_avail, mbuf_avail); - rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)), - rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), - cpy_len); - vhost_log_write(dev, buf_vec[vec_idx].buf_addr + desc_offset, - cpy_len); - PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), - cpy_len, 0); - - mbuf_avail -= cpy_len; - mbuf_offset += cpy_len; - desc_avail -= cpy_len; - desc_offset += cpy_len; - desc_chain_len += cpy_len; - } - - used_idx = cur_idx & (vq->size - 1); - vq->used->ring[used_idx].id = desc_chain_head; - vq->used->ring[used_idx].len = desc_chain_len; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, ring[used_idx]), - sizeof(vq->used->ring[used_idx])); - - return end_idx - start_idx; -} - -static inline uint32_t __attribute__((always_inline)) -virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, - struct rte_mbuf **pkts, uint32_t count) -{ - struct vhost_virtqueue *vq; - uint32_t pkt_idx = 0, nr_used = 0; - uint16_t end; - struct buf_vector buf_vec[BUF_VECTOR_MAX]; - - LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); - if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { - RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", - dev->vid, __func__, queue_id); - return 0; - } - - vq = dev->virtqueue[queue_id]; - if (unlikely(vq->enabled == 0)) - return 0; - - count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); - if (count == 0) - return 0; - - for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { - uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; - - if (unlikely(reserve_avail_buf_mergeable(vq, pkt_len, - &end, buf_vec) < 0)) { - LOG_DEBUG(VHOST_DATA, - "(%d) failed to get enough desc from vring\n", - dev->vid); - break; - } - - nr_used = copy_mbuf_to_desc_mergeable(dev, vq, end, - pkts[pkt_idx], buf_vec); - rte_smp_wmb(); - - *(volatile uint16_t *)&vq->used->idx += nr_used; - vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), - sizeof(vq->used->idx)); - vq->last_used_idx += nr_used; - } - - if (likely(pkt_idx)) { - /* flush used->idx update before we read avail->flags. */ - rte_mb(); - - /* Kick the guest if necessary. */ - if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) - && (vq->callfd >= 0)) - eventfd_write(vq->callfd, (eventfd_t)1); - } - - return pkt_idx; -} - -uint16_t -rte_vhost_enqueue_burst(int vid, uint16_t queue_id, - struct rte_mbuf **pkts, uint16_t count) -{ - struct virtio_net *dev = get_device(vid); - - if (!dev) - return 0; - - if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) - return virtio_dev_merge_rx(dev, queue_id, pkts, count); - else - return virtio_dev_rx(dev, queue_id, pkts, count); -} - -static void -parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr) -{ - struct ipv4_hdr *ipv4_hdr; - struct ipv6_hdr *ipv6_hdr; - void *l3_hdr = NULL; - struct ether_hdr *eth_hdr; - uint16_t ethertype; - - eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); - - m->l2_len = sizeof(struct ether_hdr); - ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); - - if (ethertype == ETHER_TYPE_VLAN) { - struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1); - - m->l2_len += sizeof(struct vlan_hdr); - ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); - } - - l3_hdr = (char *)eth_hdr + m->l2_len; - - switch (ethertype) { - case ETHER_TYPE_IPv4: - ipv4_hdr = (struct ipv4_hdr *)l3_hdr; - *l4_proto = ipv4_hdr->next_proto_id; - m->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4; - *l4_hdr = (char *)l3_hdr + m->l3_len; - m->ol_flags |= PKT_TX_IPV4; - break; - case ETHER_TYPE_IPv6: - ipv6_hdr = (struct ipv6_hdr *)l3_hdr; - *l4_proto = ipv6_hdr->proto; - m->l3_len = sizeof(struct ipv6_hdr); - *l4_hdr = (char *)l3_hdr + m->l3_len; - m->ol_flags |= PKT_TX_IPV6; - break; - default: - m->l3_len = 0; - *l4_proto = 0; - break; - } -} - -static inline void __attribute__((always_inline)) -vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m) -{ - uint16_t l4_proto = 0; - void *l4_hdr = NULL; - struct tcp_hdr *tcp_hdr = NULL; - - parse_ethernet(m, &l4_proto, &l4_hdr); - if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { - if (hdr->csum_start == (m->l2_len + m->l3_len)) { - switch (hdr->csum_offset) { - case (offsetof(struct tcp_hdr, cksum)): - if (l4_proto == IPPROTO_TCP) - m->ol_flags |= PKT_TX_TCP_CKSUM; - break; - case (offsetof(struct udp_hdr, dgram_cksum)): - if (l4_proto == IPPROTO_UDP) - m->ol_flags |= PKT_TX_UDP_CKSUM; - break; - case (offsetof(struct sctp_hdr, cksum)): - if (l4_proto == IPPROTO_SCTP) - m->ol_flags |= PKT_TX_SCTP_CKSUM; - break; - default: - break; - } - } - } - - if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { - switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { - case VIRTIO_NET_HDR_GSO_TCPV4: - case VIRTIO_NET_HDR_GSO_TCPV6: - tcp_hdr = (struct tcp_hdr *)l4_hdr; - m->ol_flags |= PKT_TX_TCP_SEG; - m->tso_segsz = hdr->gso_size; - m->l4_len = (tcp_hdr->data_off & 0xf0) >> 2; - break; - default: - RTE_LOG(WARNING, VHOST_DATA, - "unsupported gso type %u.\n", hdr->gso_type); - break; - } - } -} - -#define RARP_PKT_SIZE 64 - -static int -make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac) -{ - struct ether_hdr *eth_hdr; - struct arp_hdr *rarp; - - if (rarp_mbuf->buf_len < 64) { - RTE_LOG(WARNING, VHOST_DATA, - "failed to make RARP; mbuf size too small %u (< %d)\n", - rarp_mbuf->buf_len, RARP_PKT_SIZE); - return -1; - } - - /* Ethernet header. */ - eth_hdr = rte_pktmbuf_mtod_offset(rarp_mbuf, struct ether_hdr *, 0); - memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN); - ether_addr_copy(mac, ð_hdr->s_addr); - eth_hdr->ether_type = htons(ETHER_TYPE_RARP); - - /* RARP header. */ - rarp = (struct arp_hdr *)(eth_hdr + 1); - rarp->arp_hrd = htons(ARP_HRD_ETHER); - rarp->arp_pro = htons(ETHER_TYPE_IPv4); - rarp->arp_hln = ETHER_ADDR_LEN; - rarp->arp_pln = 4; - rarp->arp_op = htons(ARP_OP_REVREQUEST); - - ether_addr_copy(mac, &rarp->arp_data.arp_sha); - ether_addr_copy(mac, &rarp->arp_data.arp_tha); - memset(&rarp->arp_data.arp_sip, 0x00, 4); - memset(&rarp->arp_data.arp_tip, 0x00, 4); - - rarp_mbuf->pkt_len = rarp_mbuf->data_len = RARP_PKT_SIZE; - - return 0; -} - -static inline int __attribute__((always_inline)) -copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, - struct rte_mbuf *m, uint16_t desc_idx, - struct rte_mempool *mbuf_pool) -{ - struct vring_desc *desc; - uint64_t desc_addr; - uint32_t desc_avail, desc_offset; - uint32_t mbuf_avail, mbuf_offset; - uint32_t cpy_len; - struct rte_mbuf *cur = m, *prev = m; - struct virtio_net_hdr *hdr; - /* A counter to avoid desc dead loop chain */ - uint32_t nr_desc = 1; - - desc = &vq->desc[desc_idx]; - if (unlikely(desc->len < dev->vhost_hlen)) - return -1; - - desc_addr = gpa_to_vva(dev, desc->addr); - if (unlikely(!desc_addr)) - return -1; - - hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr); - rte_prefetch0(hdr); - - /* - * A virtio driver normally uses at least 2 desc buffers - * for Tx: the first for storing the header, and others - * for storing the data. - */ - if (likely((desc->len == dev->vhost_hlen) && - (desc->flags & VRING_DESC_F_NEXT) != 0)) { - desc = &vq->desc[desc->next]; - - desc_addr = gpa_to_vva(dev, desc->addr); - if (unlikely(!desc_addr)) - return -1; - - rte_prefetch0((void *)(uintptr_t)desc_addr); - - desc_offset = 0; - desc_avail = desc->len; - nr_desc += 1; - - PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0); - } else { - desc_avail = desc->len - dev->vhost_hlen; - desc_offset = dev->vhost_hlen; - } - - mbuf_offset = 0; - mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; - while (1) { - cpy_len = RTE_MIN(desc_avail, mbuf_avail); - rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset), - (void *)((uintptr_t)(desc_addr + desc_offset)), - cpy_len); - - mbuf_avail -= cpy_len; - mbuf_offset += cpy_len; - desc_avail -= cpy_len; - desc_offset += cpy_len; - - /* This desc reaches to its end, get the next one */ - if (desc_avail == 0) { - if ((desc->flags & VRING_DESC_F_NEXT) == 0) - break; - - if (unlikely(desc->next >= vq->size || - ++nr_desc > vq->size)) - return -1; - desc = &vq->desc[desc->next]; - - desc_addr = gpa_to_vva(dev, desc->addr); - if (unlikely(!desc_addr)) - return -1; - - rte_prefetch0((void *)(uintptr_t)desc_addr); - - desc_offset = 0; - desc_avail = desc->len; - - PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0); - } - - /* - * This mbuf reaches to its end, get a new one - * to hold more data. - */ - if (mbuf_avail == 0) { - cur = rte_pktmbuf_alloc(mbuf_pool); - if (unlikely(cur == NULL)) { - RTE_LOG(ERR, VHOST_DATA, "Failed to " - "allocate memory for mbuf.\n"); - return -1; - } - - prev->next = cur; - prev->data_len = mbuf_offset; - m->nb_segs += 1; - m->pkt_len += mbuf_offset; - prev = cur; - - mbuf_offset = 0; - mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; - } - } - - prev->data_len = mbuf_offset; - m->pkt_len += mbuf_offset; - - if (hdr->flags != 0 || hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) - vhost_dequeue_offload(hdr, m); - - return 0; -} - -uint16_t -rte_vhost_dequeue_burst(int vid, uint16_t queue_id, - struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) -{ - struct virtio_net *dev; - struct rte_mbuf *rarp_mbuf = NULL; - struct vhost_virtqueue *vq; - uint32_t desc_indexes[MAX_PKT_BURST]; - uint32_t used_idx; - uint32_t i = 0; - uint16_t free_entries; - uint16_t avail_idx; - - dev = get_device(vid); - if (!dev) - return 0; - - if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) { - RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", - dev->vid, __func__, queue_id); - return 0; - } - - vq = dev->virtqueue[queue_id]; - if (unlikely(vq->enabled == 0)) - return 0; - - /* - * Construct a RARP broadcast packet, and inject it to the "pkts" - * array, to looks like that guest actually send such packet. - * - * Check user_send_rarp() for more information. - */ - if (unlikely(rte_atomic16_cmpset((volatile uint16_t *) - &dev->broadcast_rarp.cnt, 1, 0))) { - rarp_mbuf = rte_pktmbuf_alloc(mbuf_pool); - if (rarp_mbuf == NULL) { - RTE_LOG(ERR, VHOST_DATA, - "Failed to allocate memory for mbuf.\n"); - return 0; - } - - if (make_rarp_packet(rarp_mbuf, &dev->mac)) { - rte_pktmbuf_free(rarp_mbuf); - rarp_mbuf = NULL; - } else { - count -= 1; - } - } - - avail_idx = *((volatile uint16_t *)&vq->avail->idx); - free_entries = avail_idx - vq->last_used_idx; - if (free_entries == 0) - goto out; - - LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); - - /* Prefetch available ring to retrieve head indexes. */ - used_idx = vq->last_used_idx & (vq->size - 1); - rte_prefetch0(&vq->avail->ring[used_idx]); - rte_prefetch0(&vq->used->ring[used_idx]); - - count = RTE_MIN(count, MAX_PKT_BURST); - count = RTE_MIN(count, free_entries); - LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n", - dev->vid, count); - - /* Retrieve all of the head indexes first to avoid caching issues. */ - for (i = 0; i < count; i++) { - used_idx = (vq->last_used_idx + i) & (vq->size - 1); - desc_indexes[i] = vq->avail->ring[used_idx]; - - vq->used->ring[used_idx].id = desc_indexes[i]; - vq->used->ring[used_idx].len = 0; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, ring[used_idx]), - sizeof(vq->used->ring[used_idx])); - } - - /* Prefetch descriptor index. */ - rte_prefetch0(&vq->desc[desc_indexes[0]]); - for (i = 0; i < count; i++) { - int err; - - if (likely(i + 1 < count)) - rte_prefetch0(&vq->desc[desc_indexes[i + 1]]); - - pkts[i] = rte_pktmbuf_alloc(mbuf_pool); - if (unlikely(pkts[i] == NULL)) { - RTE_LOG(ERR, VHOST_DATA, - "Failed to allocate memory for mbuf.\n"); - break; - } - err = copy_desc_to_mbuf(dev, vq, pkts[i], desc_indexes[i], - mbuf_pool); - if (unlikely(err)) { - rte_pktmbuf_free(pkts[i]); - break; - } - } - - rte_smp_wmb(); - rte_smp_rmb(); - vq->used->idx += i; - vq->last_used_idx += i; - vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), - sizeof(vq->used->idx)); - - /* Kick guest if required. */ - if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) - && (vq->callfd >= 0)) - eventfd_write(vq->callfd, (eventfd_t)1); - -out: - if (unlikely(rarp_mbuf != NULL)) { - /* - * Inject it to the head of "pkts" array, so that switch's mac - * learning table will get updated first. - */ - memmove(&pkts[1], pkts, i * sizeof(struct rte_mbuf *)); - pkts[0] = rarp_mbuf; - i += 1; - } - - return i; -} diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c new file mode 100644 index 00000000..6b83c15f --- /dev/null +++ b/lib/librte_vhost/vhost_user.c @@ -0,0 +1,1033 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef RTE_LIBRTE_VHOST_NUMA +#include +#endif + +#include +#include +#include + +#include "vhost.h" +#include "vhost_user.h" + +static const char *vhost_message_str[VHOST_USER_MAX] = { + [VHOST_USER_NONE] = "VHOST_USER_NONE", + [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", + [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", + [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", + [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", + [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", + [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE", + [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD", + [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", + [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", + [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", + [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", + [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", + [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", + [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR", + [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", + [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", + [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", + [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", + [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP", +}; + +static uint64_t +get_blk_size(int fd) +{ + struct stat stat; + int ret; + + ret = fstat(fd, &stat); + return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; +} + +static void +free_mem_region(struct virtio_net *dev) +{ + uint32_t i; + struct virtio_memory_region *reg; + + if (!dev || !dev->mem) + return; + + for (i = 0; i < dev->mem->nregions; i++) { + reg = &dev->mem->regions[i]; + if (reg->host_user_addr) { + munmap(reg->mmap_addr, reg->mmap_size); + close(reg->fd); + } + } +} + +void +vhost_backend_cleanup(struct virtio_net *dev) +{ + if (dev->mem) { + free_mem_region(dev); + rte_free(dev->mem); + dev->mem = NULL; + } + if (dev->log_addr) { + munmap((void *)(uintptr_t)dev->log_addr, dev->log_size); + dev->log_addr = 0; + } +} + +/* + * This function just returns success at the moment unless + * the device hasn't been initialised. + */ +static int +vhost_user_set_owner(void) +{ + return 0; +} + +static int +vhost_user_reset_owner(struct virtio_net *dev) +{ + if (dev->flags & VIRTIO_DEV_RUNNING) { + dev->flags &= ~VIRTIO_DEV_RUNNING; + notify_ops->destroy_device(dev->vid); + } + + cleanup_device(dev, 0); + reset_device(dev); + return 0; +} + +/* + * The features that we support are requested. + */ +static uint64_t +vhost_user_get_features(void) +{ + return VHOST_FEATURES; +} + +/* + * We receive the negotiated features supported by us and the virtio device. + */ +static int +vhost_user_set_features(struct virtio_net *dev, uint64_t features) +{ + if (features & ~VHOST_FEATURES) + return -1; + + dev->features = features; + if (dev->features & + ((1 << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) { + dev->vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf); + } else { + dev->vhost_hlen = sizeof(struct virtio_net_hdr); + } + LOG_DEBUG(VHOST_CONFIG, + "(%d) mergeable RX buffers %s, virtio 1 %s\n", + dev->vid, + (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off", + (dev->features & (1ULL << VIRTIO_F_VERSION_1)) ? "on" : "off"); + + return 0; +} + +/* + * The virtio device sends us the size of the descriptor ring. + */ +static int +vhost_user_set_vring_num(struct virtio_net *dev, + struct vhost_vring_state *state) +{ + struct vhost_virtqueue *vq = dev->virtqueue[state->index]; + + vq->size = state->num; + + if (dev->dequeue_zero_copy) { + vq->nr_zmbuf = 0; + vq->last_zmbuf_idx = 0; + vq->zmbuf_size = vq->size; + vq->zmbufs = rte_zmalloc(NULL, vq->zmbuf_size * + sizeof(struct zcopy_mbuf), 0); + if (vq->zmbufs == NULL) { + RTE_LOG(WARNING, VHOST_CONFIG, + "failed to allocate mem for zero copy; " + "zero copy is force disabled\n"); + dev->dequeue_zero_copy = 0; + } + } + + vq->shadow_used_ring = rte_malloc(NULL, + vq->size * sizeof(struct vring_used_elem), + RTE_CACHE_LINE_SIZE); + if (!vq->shadow_used_ring) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to allocate memory for shadow used ring.\n"); + return -1; + } + + return 0; +} + +/* + * Reallocate virtio_dev and vhost_virtqueue data structure to make them on the + * same numa node as the memory of vring descriptor. + */ +#ifdef RTE_LIBRTE_VHOST_NUMA +static struct virtio_net* +numa_realloc(struct virtio_net *dev, int index) +{ + int oldnode, newnode; + struct virtio_net *old_dev; + struct vhost_virtqueue *old_vq, *vq; + int ret; + + /* + * vq is allocated on pairs, we should try to do realloc + * on first queue of one queue pair only. + */ + if (index % VIRTIO_QNUM != 0) + return dev; + + old_dev = dev; + vq = old_vq = dev->virtqueue[index]; + + ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc, + MPOL_F_NODE | MPOL_F_ADDR); + + /* check if we need to reallocate vq */ + ret |= get_mempolicy(&oldnode, NULL, 0, old_vq, + MPOL_F_NODE | MPOL_F_ADDR); + if (ret) { + RTE_LOG(ERR, VHOST_CONFIG, + "Unable to get vq numa information.\n"); + return dev; + } + if (oldnode != newnode) { + RTE_LOG(INFO, VHOST_CONFIG, + "reallocate vq from %d to %d node\n", oldnode, newnode); + vq = rte_malloc_socket(NULL, sizeof(*vq) * VIRTIO_QNUM, 0, + newnode); + if (!vq) + return dev; + + memcpy(vq, old_vq, sizeof(*vq) * VIRTIO_QNUM); + rte_free(old_vq); + } + + /* check if we need to reallocate dev */ + ret = get_mempolicy(&oldnode, NULL, 0, old_dev, + MPOL_F_NODE | MPOL_F_ADDR); + if (ret) { + RTE_LOG(ERR, VHOST_CONFIG, + "Unable to get dev numa information.\n"); + goto out; + } + if (oldnode != newnode) { + RTE_LOG(INFO, VHOST_CONFIG, + "reallocate dev from %d to %d node\n", + oldnode, newnode); + dev = rte_malloc_socket(NULL, sizeof(*dev), 0, newnode); + if (!dev) { + dev = old_dev; + goto out; + } + + memcpy(dev, old_dev, sizeof(*dev)); + rte_free(old_dev); + } + +out: + dev->virtqueue[index] = vq; + dev->virtqueue[index + 1] = vq + 1; + vhost_devices[dev->vid] = dev; + + return dev; +} +#else +static struct virtio_net* +numa_realloc(struct virtio_net *dev, int index __rte_unused) +{ + return dev; +} +#endif + +/* + * Converts QEMU virtual address to Vhost virtual address. This function is + * used to convert the ring addresses to our address space. + */ +static uint64_t +qva_to_vva(struct virtio_net *dev, uint64_t qva) +{ + struct virtio_memory_region *reg; + uint32_t i; + + /* Find the region where the address lives. */ + for (i = 0; i < dev->mem->nregions; i++) { + reg = &dev->mem->regions[i]; + + if (qva >= reg->guest_user_addr && + qva < reg->guest_user_addr + reg->size) { + return qva - reg->guest_user_addr + + reg->host_user_addr; + } + } + + return 0; +} + +/* + * The virtio device sends us the desc, used and avail ring addresses. + * This function then converts these to our address space. + */ +static int +vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr) +{ + struct vhost_virtqueue *vq; + + if (dev->mem == NULL) + return -1; + + /* addr->index refers to the queue index. The txq 1, rxq is 0. */ + vq = dev->virtqueue[addr->index]; + + /* The addresses are converted from QEMU virtual to Vhost virtual. */ + vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev, + addr->desc_user_addr); + if (vq->desc == 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%d) failed to find desc ring address.\n", + dev->vid); + return -1; + } + + dev = numa_realloc(dev, addr->index); + vq = dev->virtqueue[addr->index]; + + vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev, + addr->avail_user_addr); + if (vq->avail == 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%d) failed to find avail ring address.\n", + dev->vid); + return -1; + } + + vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev, + addr->used_user_addr); + if (vq->used == 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%d) failed to find used ring address.\n", + dev->vid); + return -1; + } + + if (vq->last_used_idx != vq->used->idx) { + RTE_LOG(WARNING, VHOST_CONFIG, + "last_used_idx (%u) and vq->used->idx (%u) mismatches; " + "some packets maybe resent for Tx and dropped for Rx\n", + vq->last_used_idx, vq->used->idx); + vq->last_used_idx = vq->used->idx; + vq->last_avail_idx = vq->used->idx; + } + + vq->log_guest_addr = addr->log_guest_addr; + + LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n", + dev->vid, vq->desc); + LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n", + dev->vid, vq->avail); + LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n", + dev->vid, vq->used); + LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n", + dev->vid, vq->log_guest_addr); + + return 0; +} + +/* + * The virtio device sends us the available ring last used index. + */ +static int +vhost_user_set_vring_base(struct virtio_net *dev, + struct vhost_vring_state *state) +{ + dev->virtqueue[state->index]->last_used_idx = state->num; + dev->virtqueue[state->index]->last_avail_idx = state->num; + + return 0; +} + +static void +add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, + uint64_t host_phys_addr, uint64_t size) +{ + struct guest_page *page, *last_page; + + if (dev->nr_guest_pages == dev->max_guest_pages) { + dev->max_guest_pages *= 2; + dev->guest_pages = realloc(dev->guest_pages, + dev->max_guest_pages * sizeof(*page)); + } + + if (dev->nr_guest_pages > 0) { + last_page = &dev->guest_pages[dev->nr_guest_pages - 1]; + /* merge if the two pages are continuous */ + if (host_phys_addr == last_page->host_phys_addr + + last_page->size) { + last_page->size += size; + return; + } + } + + page = &dev->guest_pages[dev->nr_guest_pages++]; + page->guest_phys_addr = guest_phys_addr; + page->host_phys_addr = host_phys_addr; + page->size = size; +} + +static void +add_guest_pages(struct virtio_net *dev, struct virtio_memory_region *reg, + uint64_t page_size) +{ + uint64_t reg_size = reg->size; + uint64_t host_user_addr = reg->host_user_addr; + uint64_t guest_phys_addr = reg->guest_phys_addr; + uint64_t host_phys_addr; + uint64_t size; + + host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t)host_user_addr); + size = page_size - (guest_phys_addr & (page_size - 1)); + size = RTE_MIN(size, reg_size); + + add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size); + host_user_addr += size; + guest_phys_addr += size; + reg_size -= size; + + while (reg_size > 0) { + host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t) + host_user_addr); + add_one_guest_page(dev, guest_phys_addr, host_phys_addr, + page_size); + + host_user_addr += page_size; + guest_phys_addr += page_size; + reg_size -= page_size; + } +} + +#ifdef RTE_LIBRTE_VHOST_DEBUG +/* TODO: enable it only in debug mode? */ +static void +dump_guest_pages(struct virtio_net *dev) +{ + uint32_t i; + struct guest_page *page; + + for (i = 0; i < dev->nr_guest_pages; i++) { + page = &dev->guest_pages[i]; + + RTE_LOG(INFO, VHOST_CONFIG, + "guest physical page region %u\n" + "\t guest_phys_addr: %" PRIx64 "\n" + "\t host_phys_addr : %" PRIx64 "\n" + "\t size : %" PRIx64 "\n", + i, + page->guest_phys_addr, + page->host_phys_addr, + page->size); + } +} +#else +#define dump_guest_pages(dev) +#endif + +static int +vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) +{ + struct VhostUserMemory memory = pmsg->payload.memory; + struct virtio_memory_region *reg; + void *mmap_addr; + uint64_t mmap_size; + uint64_t mmap_offset; + uint64_t alignment; + uint32_t i; + int fd; + + /* Remove from the data plane. */ + if (dev->flags & VIRTIO_DEV_RUNNING) { + dev->flags &= ~VIRTIO_DEV_RUNNING; + notify_ops->destroy_device(dev->vid); + } + + if (dev->mem) { + free_mem_region(dev); + rte_free(dev->mem); + dev->mem = NULL; + } + + dev->nr_guest_pages = 0; + if (!dev->guest_pages) { + dev->max_guest_pages = 8; + dev->guest_pages = malloc(dev->max_guest_pages * + sizeof(struct guest_page)); + } + + dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct virtio_memory) + + sizeof(struct virtio_memory_region) * memory.nregions, 0); + if (dev->mem == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%d) failed to allocate memory for dev->mem\n", + dev->vid); + return -1; + } + dev->mem->nregions = memory.nregions; + + for (i = 0; i < memory.nregions; i++) { + fd = pmsg->fds[i]; + reg = &dev->mem->regions[i]; + + reg->guest_phys_addr = memory.regions[i].guest_phys_addr; + reg->guest_user_addr = memory.regions[i].userspace_addr; + reg->size = memory.regions[i].memory_size; + reg->fd = fd; + + mmap_offset = memory.regions[i].mmap_offset; + mmap_size = reg->size + mmap_offset; + + /* mmap() without flag of MAP_ANONYMOUS, should be called + * with length argument aligned with hugepagesz at older + * longterm version Linux, like 2.6.32 and 3.2.72, or + * mmap() will fail with EINVAL. + * + * to avoid failure, make sure in caller to keep length + * aligned. + */ + alignment = get_blk_size(fd); + if (alignment == (uint64_t)-1) { + RTE_LOG(ERR, VHOST_CONFIG, + "couldn't get hugepage size through fstat\n"); + goto err_mmap; + } + mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment); + + mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, 0); + + if (mmap_addr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, + "mmap region %u failed.\n", i); + goto err_mmap; + } + + reg->mmap_addr = mmap_addr; + reg->mmap_size = mmap_size; + reg->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + + mmap_offset; + + add_guest_pages(dev, reg, alignment); + + RTE_LOG(INFO, VHOST_CONFIG, + "guest memory region %u, size: 0x%" PRIx64 "\n" + "\t guest physical addr: 0x%" PRIx64 "\n" + "\t guest virtual addr: 0x%" PRIx64 "\n" + "\t host virtual addr: 0x%" PRIx64 "\n" + "\t mmap addr : 0x%" PRIx64 "\n" + "\t mmap size : 0x%" PRIx64 "\n" + "\t mmap align: 0x%" PRIx64 "\n" + "\t mmap off : 0x%" PRIx64 "\n", + i, reg->size, + reg->guest_phys_addr, + reg->guest_user_addr, + reg->host_user_addr, + (uint64_t)(uintptr_t)mmap_addr, + mmap_size, + alignment, + mmap_offset); + } + + dump_guest_pages(dev); + + return 0; + +err_mmap: + free_mem_region(dev); + rte_free(dev->mem); + dev->mem = NULL; + return -1; +} + +static int +vq_is_ready(struct vhost_virtqueue *vq) +{ + return vq && vq->desc && + vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD && + vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD; +} + +static int +virtio_is_ready(struct virtio_net *dev) +{ + struct vhost_virtqueue *rvq, *tvq; + uint32_t i; + + for (i = 0; i < dev->virt_qp_nb; i++) { + rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ]; + tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ]; + + if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) { + RTE_LOG(INFO, VHOST_CONFIG, + "virtio is not ready for processing.\n"); + return 0; + } + } + + RTE_LOG(INFO, VHOST_CONFIG, + "virtio is now ready for processing.\n"); + return 1; +} + +static void +vhost_user_set_vring_call(struct virtio_net *dev, struct VhostUserMsg *pmsg) +{ + struct vhost_vring_file file; + struct vhost_virtqueue *vq; + uint32_t cur_qp_idx; + + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) + file.fd = VIRTIO_INVALID_EVENTFD; + else + file.fd = pmsg->fds[0]; + RTE_LOG(INFO, VHOST_CONFIG, + "vring call idx:%d file:%d\n", file.index, file.fd); + + /* + * FIXME: VHOST_SET_VRING_CALL is the first per-vring message + * we get, so we do vring queue pair allocation here. + */ + cur_qp_idx = file.index / VIRTIO_QNUM; + if (cur_qp_idx + 1 > dev->virt_qp_nb) { + if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0) + return; + } + + vq = dev->virtqueue[file.index]; + assert(vq != NULL); + + if (vq->callfd >= 0) + close(vq->callfd); + + vq->callfd = file.fd; +} + +/* + * In vhost-user, when we receive kick message, will test whether virtio + * device is ready for packet processing. + */ +static void +vhost_user_set_vring_kick(struct virtio_net *dev, struct VhostUserMsg *pmsg) +{ + struct vhost_vring_file file; + struct vhost_virtqueue *vq; + + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) + file.fd = VIRTIO_INVALID_EVENTFD; + else + file.fd = pmsg->fds[0]; + RTE_LOG(INFO, VHOST_CONFIG, + "vring kick idx:%d file:%d\n", file.index, file.fd); + + vq = dev->virtqueue[file.index]; + if (vq->kickfd >= 0) + close(vq->kickfd); + vq->kickfd = file.fd; + + if (virtio_is_ready(dev) && !(dev->flags & VIRTIO_DEV_RUNNING)) { + if (dev->dequeue_zero_copy) { + RTE_LOG(INFO, VHOST_CONFIG, + "dequeue zero copy is enabled\n"); + } + + if (notify_ops->new_device(dev->vid) == 0) + dev->flags |= VIRTIO_DEV_RUNNING; + } +} + +static void +free_zmbufs(struct vhost_virtqueue *vq) +{ + struct zcopy_mbuf *zmbuf, *next; + + for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list); + zmbuf != NULL; zmbuf = next) { + next = TAILQ_NEXT(zmbuf, next); + + rte_pktmbuf_free(zmbuf->mbuf); + TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next); + } + + rte_free(vq->zmbufs); +} + +/* + * when virtio is stopped, qemu will send us the GET_VRING_BASE message. + */ +static int +vhost_user_get_vring_base(struct virtio_net *dev, + struct vhost_vring_state *state) +{ + struct vhost_virtqueue *vq = dev->virtqueue[state->index]; + + /* We have to stop the queue (virtio) if it is running. */ + if (dev->flags & VIRTIO_DEV_RUNNING) { + dev->flags &= ~VIRTIO_DEV_RUNNING; + notify_ops->destroy_device(dev->vid); + } + + /* Here we are safe to get the last used index */ + state->num = vq->last_used_idx; + + RTE_LOG(INFO, VHOST_CONFIG, + "vring base idx:%d file:%d\n", state->index, state->num); + /* + * Based on current qemu vhost-user implementation, this message is + * sent and only sent in vhost_vring_stop. + * TODO: cleanup the vring, it isn't usable since here. + */ + if (vq->kickfd >= 0) + close(vq->kickfd); + + vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; + + if (dev->dequeue_zero_copy) + free_zmbufs(vq); + rte_free(vq->shadow_used_ring); + vq->shadow_used_ring = NULL; + + return 0; +} + +/* + * when virtio queues are ready to work, qemu will send us to + * enable the virtio queue pair. + */ +static int +vhost_user_set_vring_enable(struct virtio_net *dev, + struct vhost_vring_state *state) +{ + int enable = (int)state->num; + + RTE_LOG(INFO, VHOST_CONFIG, + "set queue enable: %d to qp idx: %d\n", + enable, state->index); + + if (notify_ops->vring_state_changed) + notify_ops->vring_state_changed(dev->vid, state->index, enable); + + dev->virtqueue[state->index]->enabled = enable; + + return 0; +} + +static void +vhost_user_set_protocol_features(struct virtio_net *dev, + uint64_t protocol_features) +{ + if (protocol_features & ~VHOST_USER_PROTOCOL_FEATURES) + return; + + dev->protocol_features = protocol_features; +} + +static int +vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg) +{ + int fd = msg->fds[0]; + uint64_t size, off; + void *addr; + + if (fd < 0) { + RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd); + return -1; + } + + if (msg->size != sizeof(VhostUserLog)) { + RTE_LOG(ERR, VHOST_CONFIG, + "invalid log base msg size: %"PRId32" != %d\n", + msg->size, (int)sizeof(VhostUserLog)); + return -1; + } + + size = msg->payload.log.mmap_size; + off = msg->payload.log.mmap_offset; + RTE_LOG(INFO, VHOST_CONFIG, + "log mmap size: %"PRId64", offset: %"PRId64"\n", + size, off); + + /* + * mmap from 0 to workaround a hugepage mmap bug: mmap will + * fail when offset is not page size aligned. + */ + addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + close(fd); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n"); + return -1; + } + + /* + * Free previously mapped log memory on occasionally + * multiple VHOST_USER_SET_LOG_BASE. + */ + if (dev->log_addr) { + munmap((void *)(uintptr_t)dev->log_addr, dev->log_size); + } + dev->log_addr = (uint64_t)(uintptr_t)addr; + dev->log_base = dev->log_addr + off; + dev->log_size = size; + + return 0; +} + +/* + * An rarp packet is constructed and broadcasted to notify switches about + * the new location of the migrated VM, so that packets from outside will + * not be lost after migration. + * + * However, we don't actually "send" a rarp packet here, instead, we set + * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it. + */ +static int +vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg) +{ + uint8_t *mac = (uint8_t *)&msg->payload.u64; + + RTE_LOG(DEBUG, VHOST_CONFIG, + ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n", + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); + memcpy(dev->mac.addr_bytes, mac, 6); + + /* + * Set the flag to inject a RARP broadcast packet at + * rte_vhost_dequeue_burst(). + * + * rte_smp_wmb() is for making sure the mac is copied + * before the flag is set. + */ + rte_smp_wmb(); + rte_atomic16_set(&dev->broadcast_rarp, 1); + + return 0; +} + +/* return bytes# of read on success or negative val on failure. */ +static int +read_vhost_message(int sockfd, struct VhostUserMsg *msg) +{ + int ret; + + ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE, + msg->fds, VHOST_MEMORY_MAX_NREGIONS); + if (ret <= 0) + return ret; + + if (msg && msg->size) { + if (msg->size > sizeof(msg->payload)) { + RTE_LOG(ERR, VHOST_CONFIG, + "invalid msg size: %d\n", msg->size); + return -1; + } + ret = read(sockfd, &msg->payload, msg->size); + if (ret <= 0) + return ret; + if (ret != (int)msg->size) { + RTE_LOG(ERR, VHOST_CONFIG, + "read control message failed\n"); + return -1; + } + } + + return ret; +} + +static int +send_vhost_message(int sockfd, struct VhostUserMsg *msg) +{ + int ret; + + if (!msg) + return 0; + + msg->flags &= ~VHOST_USER_VERSION_MASK; + msg->flags |= VHOST_USER_VERSION; + msg->flags |= VHOST_USER_REPLY_MASK; + + ret = send_fd_message(sockfd, (char *)msg, + VHOST_USER_HDR_SIZE + msg->size, NULL, 0); + + return ret; +} + +int +vhost_user_msg_handler(int vid, int fd) +{ + struct virtio_net *dev; + struct VhostUserMsg msg; + int ret; + + dev = get_device(vid); + if (dev == NULL) + return -1; + + ret = read_vhost_message(fd, &msg); + if (ret <= 0 || msg.request >= VHOST_USER_MAX) { + if (ret < 0) + RTE_LOG(ERR, VHOST_CONFIG, + "vhost read message failed\n"); + else if (ret == 0) + RTE_LOG(INFO, VHOST_CONFIG, + "vhost peer closed\n"); + else + RTE_LOG(ERR, VHOST_CONFIG, + "vhost read incorrect message\n"); + + return -1; + } + + RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n", + vhost_message_str[msg.request]); + switch (msg.request) { + case VHOST_USER_GET_FEATURES: + msg.payload.u64 = vhost_user_get_features(); + msg.size = sizeof(msg.payload.u64); + send_vhost_message(fd, &msg); + break; + case VHOST_USER_SET_FEATURES: + vhost_user_set_features(dev, msg.payload.u64); + break; + + case VHOST_USER_GET_PROTOCOL_FEATURES: + msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES; + msg.size = sizeof(msg.payload.u64); + send_vhost_message(fd, &msg); + break; + case VHOST_USER_SET_PROTOCOL_FEATURES: + vhost_user_set_protocol_features(dev, msg.payload.u64); + break; + + case VHOST_USER_SET_OWNER: + vhost_user_set_owner(); + break; + case VHOST_USER_RESET_OWNER: + vhost_user_reset_owner(dev); + break; + + case VHOST_USER_SET_MEM_TABLE: + vhost_user_set_mem_table(dev, &msg); + break; + + case VHOST_USER_SET_LOG_BASE: + vhost_user_set_log_base(dev, &msg); + + /* it needs a reply */ + msg.size = sizeof(msg.payload.u64); + send_vhost_message(fd, &msg); + break; + case VHOST_USER_SET_LOG_FD: + close(msg.fds[0]); + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n"); + break; + + case VHOST_USER_SET_VRING_NUM: + vhost_user_set_vring_num(dev, &msg.payload.state); + break; + case VHOST_USER_SET_VRING_ADDR: + vhost_user_set_vring_addr(dev, &msg.payload.addr); + break; + case VHOST_USER_SET_VRING_BASE: + vhost_user_set_vring_base(dev, &msg.payload.state); + break; + + case VHOST_USER_GET_VRING_BASE: + ret = vhost_user_get_vring_base(dev, &msg.payload.state); + msg.size = sizeof(msg.payload.state); + send_vhost_message(fd, &msg); + break; + + case VHOST_USER_SET_VRING_KICK: + vhost_user_set_vring_kick(dev, &msg); + break; + case VHOST_USER_SET_VRING_CALL: + vhost_user_set_vring_call(dev, &msg); + break; + + case VHOST_USER_SET_VRING_ERR: + if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK)) + close(msg.fds[0]); + RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n"); + break; + + case VHOST_USER_GET_QUEUE_NUM: + msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS; + msg.size = sizeof(msg.payload.u64); + send_vhost_message(fd, &msg); + break; + + case VHOST_USER_SET_VRING_ENABLE: + vhost_user_set_vring_enable(dev, &msg.payload.state); + break; + case VHOST_USER_SEND_RARP: + vhost_user_send_rarp(dev, &msg); + break; + + default: + break; + + } + + return 0; +} diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h new file mode 100644 index 00000000..ba78d326 --- /dev/null +++ b/lib/librte_vhost/vhost_user.h @@ -0,0 +1,128 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VHOST_NET_USER_H +#define _VHOST_NET_USER_H + +#include +#include + +#include "rte_virtio_net.h" + +/* refer to hw/virtio/vhost-user.c */ + +#define VHOST_MEMORY_MAX_NREGIONS 8 + +#define VHOST_USER_PROTOCOL_F_MQ 0 +#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 +#define VHOST_USER_PROTOCOL_F_RARP 2 + +#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ + (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\ + (1ULL << VHOST_USER_PROTOCOL_F_RARP)) + +typedef enum VhostUserRequest { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_SEND_RARP = 19, + VHOST_USER_MAX +} VhostUserRequest; + +typedef struct VhostUserMemoryRegion { + uint64_t guest_phys_addr; + uint64_t memory_size; + uint64_t userspace_addr; + uint64_t mmap_offset; +} VhostUserMemoryRegion; + +typedef struct VhostUserMemory { + uint32_t nregions; + uint32_t padding; + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; +} VhostUserMemory; + +typedef struct VhostUserLog { + uint64_t mmap_size; + uint64_t mmap_offset; +} VhostUserLog; + +typedef struct VhostUserMsg { + VhostUserRequest request; + +#define VHOST_USER_VERSION_MASK 0x3 +#define VHOST_USER_REPLY_MASK (0x1 << 2) + uint32_t flags; + uint32_t size; /* the following payload size */ + union { +#define VHOST_USER_VRING_IDX_MASK 0xff +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + VhostUserMemory memory; + VhostUserLog log; + } payload; + int fds[VHOST_MEMORY_MAX_NREGIONS]; +} __attribute((packed)) VhostUserMsg; + +#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) + +/* The version of the protocol we support */ +#define VHOST_USER_VERSION 0x1 + + +/* vhost_user.c */ +int vhost_user_msg_handler(int vid, int fd); + +/* socket.c */ +int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num); +int send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num); + +#endif diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/vhost_user/fd_man.c deleted file mode 100644 index 2d3eeb7d..00000000 --- a/lib/librte_vhost/vhost_user/fd_man.c +++ /dev/null @@ -1,299 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "fd_man.h" - -/** - * Returns the index in the fdset for a given fd. - * If fd is -1, it means to search for a free entry. - * @return - * index for the fd, or -1 if fd isn't in the fdset. - */ -static int -fdset_find_fd(struct fdset *pfdset, int fd) -{ - int i; - - if (pfdset == NULL) - return -1; - - for (i = 0; i < MAX_FDS && pfdset->fd[i].fd != fd; i++) - ; - - return i == MAX_FDS ? -1 : i; -} - -static int -fdset_find_free_slot(struct fdset *pfdset) -{ - return fdset_find_fd(pfdset, -1); -} - -static int -fdset_add_fd(struct fdset *pfdset, int idx, int fd, - fd_cb rcb, fd_cb wcb, void *dat) -{ - struct fdentry *pfdentry; - - if (pfdset == NULL || idx >= MAX_FDS || fd >= FD_SETSIZE) - return -1; - - pfdentry = &pfdset->fd[idx]; - pfdentry->fd = fd; - pfdentry->rcb = rcb; - pfdentry->wcb = wcb; - pfdentry->dat = dat; - - return 0; -} - -/** - * Fill the read/write fd_set with the fds in the fdset. - * @return - * the maximum fds filled in the read/write fd_set. - */ -static int -fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset) -{ - struct fdentry *pfdentry; - int i, maxfds = -1; - int num = MAX_FDS; - - if (pfdset == NULL) - return -1; - - for (i = 0; i < num; i++) { - pfdentry = &pfdset->fd[i]; - if (pfdentry->fd != -1) { - int added = 0; - if (pfdentry->rcb && rfset) { - FD_SET(pfdentry->fd, rfset); - added = 1; - } - if (pfdentry->wcb && wfset) { - FD_SET(pfdentry->fd, wfset); - added = 1; - } - if (added) - maxfds = pfdentry->fd < maxfds ? - maxfds : pfdentry->fd; - } - } - return maxfds; -} - -void -fdset_init(struct fdset *pfdset) -{ - int i; - - if (pfdset == NULL) - return; - - for (i = 0; i < MAX_FDS; i++) { - pfdset->fd[i].fd = -1; - pfdset->fd[i].dat = NULL; - } - pfdset->num = 0; -} - -/** - * Register the fd in the fdset with read/write handler and context. - */ -int -fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat) -{ - int i; - - if (pfdset == NULL || fd == -1) - return -1; - - pthread_mutex_lock(&pfdset->fd_mutex); - - /* Find a free slot in the list. */ - i = fdset_find_free_slot(pfdset); - if (i == -1 || fdset_add_fd(pfdset, i, fd, rcb, wcb, dat) < 0) { - pthread_mutex_unlock(&pfdset->fd_mutex); - return -2; - } - - pfdset->num++; - - pthread_mutex_unlock(&pfdset->fd_mutex); - - return 0; -} - -/** - * Unregister the fd from the fdset. - * Returns context of a given fd or NULL. - */ -void * -fdset_del(struct fdset *pfdset, int fd) -{ - int i; - void *dat = NULL; - - if (pfdset == NULL || fd == -1) - return NULL; - - do { - pthread_mutex_lock(&pfdset->fd_mutex); - - i = fdset_find_fd(pfdset, fd); - if (i != -1 && pfdset->fd[i].busy == 0) { - /* busy indicates r/wcb is executing! */ - dat = pfdset->fd[i].dat; - pfdset->fd[i].fd = -1; - pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL; - pfdset->fd[i].dat = NULL; - pfdset->num--; - i = -1; - } - pthread_mutex_unlock(&pfdset->fd_mutex); - } while (i != -1); - - return dat; -} - -/** - * Unregister the fd at the specified slot from the fdset. - */ -static void -fdset_del_slot(struct fdset *pfdset, int index) -{ - if (pfdset == NULL || index < 0 || index >= MAX_FDS) - return; - - pthread_mutex_lock(&pfdset->fd_mutex); - - pfdset->fd[index].fd = -1; - pfdset->fd[index].rcb = pfdset->fd[index].wcb = NULL; - pfdset->fd[index].dat = NULL; - pfdset->num--; - - pthread_mutex_unlock(&pfdset->fd_mutex); -} - -/** - * This functions runs in infinite blocking loop until there is no fd in - * pfdset. It calls corresponding r/w handler if there is event on the fd. - * - * Before the callback is called, we set the flag to busy status; If other - * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it - * will wait until the flag is reset to zero(which indicates the callback is - * finished), then it could free the context after fdset_del. - */ -void -fdset_event_dispatch(struct fdset *pfdset) -{ - fd_set rfds, wfds; - int i, maxfds; - struct fdentry *pfdentry; - int num = MAX_FDS; - fd_cb rcb, wcb; - void *dat; - int fd; - int remove1, remove2; - int ret; - - if (pfdset == NULL) - return; - - while (1) { - struct timeval tv; - tv.tv_sec = 1; - tv.tv_usec = 0; - FD_ZERO(&rfds); - FD_ZERO(&wfds); - pthread_mutex_lock(&pfdset->fd_mutex); - - maxfds = fdset_fill(&rfds, &wfds, pfdset); - - pthread_mutex_unlock(&pfdset->fd_mutex); - - /* - * When select is blocked, other threads might unregister - * listenfds from and register new listenfds into fdset. - * When select returns, the entries for listenfds in the fdset - * might have been updated. It is ok if there is unwanted call - * for new listenfds. - */ - ret = select(maxfds + 1, &rfds, &wfds, NULL, &tv); - if (ret <= 0) - continue; - - for (i = 0; i < num; i++) { - remove1 = remove2 = 0; - pthread_mutex_lock(&pfdset->fd_mutex); - pfdentry = &pfdset->fd[i]; - fd = pfdentry->fd; - rcb = pfdentry->rcb; - wcb = pfdentry->wcb; - dat = pfdentry->dat; - pfdentry->busy = 1; - pthread_mutex_unlock(&pfdset->fd_mutex); - if (fd >= 0 && FD_ISSET(fd, &rfds) && rcb) - rcb(fd, dat, &remove1); - if (fd >= 0 && FD_ISSET(fd, &wfds) && wcb) - wcb(fd, dat, &remove2); - pfdentry->busy = 0; - /* - * fdset_del needs to check busy flag. - * We don't allow fdset_del to be called in callback - * directly. - */ - /* - * When we are to clean up the fd from fdset, - * because the fd is closed in the cb, - * the old fd val could be reused by when creates new - * listen fd in another thread, we couldn't call - * fd_set_del. - */ - if (remove1 || remove2) - fdset_del_slot(pfdset, i); - } - } -} diff --git a/lib/librte_vhost/vhost_user/fd_man.h b/lib/librte_vhost/vhost_user/fd_man.h deleted file mode 100644 index bd66ed1c..00000000 --- a/lib/librte_vhost/vhost_user/fd_man.h +++ /dev/null @@ -1,67 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _FD_MAN_H_ -#define _FD_MAN_H_ -#include -#include - -#define MAX_FDS 1024 - -typedef void (*fd_cb)(int fd, void *dat, int *remove); - -struct fdentry { - int fd; /* -1 indicates this entry is empty */ - fd_cb rcb; /* callback when this fd is readable. */ - fd_cb wcb; /* callback when this fd is writeable.*/ - void *dat; /* fd context */ - int busy; /* whether this entry is being used in cb. */ -}; - -struct fdset { - struct fdentry fd[MAX_FDS]; - pthread_mutex_t fd_mutex; - int num; /* current fd number of this fdset */ -}; - - -void fdset_init(struct fdset *pfdset); - -int fdset_add(struct fdset *pfdset, int fd, - fd_cb rcb, fd_cb wcb, void *dat); - -void *fdset_del(struct fdset *pfdset, int fd); - -void fdset_event_dispatch(struct fdset *pfdset); - -#endif diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c deleted file mode 100644 index b35594d9..00000000 --- a/lib/librte_vhost/vhost_user/vhost-net-user.c +++ /dev/null @@ -1,795 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "fd_man.h" -#include "vhost-net-user.h" -#include "vhost-net.h" -#include "virtio-net-user.h" - -/* - * Every time rte_vhost_driver_register() is invoked, an associated - * vhost_user_socket struct will be created. - */ -struct vhost_user_socket { - char *path; - int listenfd; - int connfd; - bool is_server; - bool reconnect; -}; - -struct vhost_user_connection { - struct vhost_user_socket *vsocket; - int vid; -}; - -#define MAX_VHOST_SOCKET 1024 -struct vhost_user { - struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET]; - struct fdset fdset; - int vsocket_cnt; - pthread_mutex_t mutex; -}; - -#define MAX_VIRTIO_BACKLOG 128 - -static void vhost_user_server_new_connection(int fd, void *data, int *remove); -static void vhost_user_msg_handler(int fd, void *dat, int *remove); -static int vhost_user_create_client(struct vhost_user_socket *vsocket); - -static struct vhost_user vhost_user = { - .fdset = { - .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} }, - .fd_mutex = PTHREAD_MUTEX_INITIALIZER, - .num = 0 - }, - .vsocket_cnt = 0, - .mutex = PTHREAD_MUTEX_INITIALIZER, -}; - -static const char *vhost_message_str[VHOST_USER_MAX] = { - [VHOST_USER_NONE] = "VHOST_USER_NONE", - [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", - [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", - [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", - [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", - [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", - [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE", - [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD", - [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", - [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", - [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", - [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", - [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", - [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", - [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR", - [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", - [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", - [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", - [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", - [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP", -}; - -/* return bytes# of read on success or negative val on failure. */ -static int -read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) -{ - struct iovec iov; - struct msghdr msgh; - size_t fdsize = fd_num * sizeof(int); - char control[CMSG_SPACE(fdsize)]; - struct cmsghdr *cmsg; - int ret; - - memset(&msgh, 0, sizeof(msgh)); - iov.iov_base = buf; - iov.iov_len = buflen; - - msgh.msg_iov = &iov; - msgh.msg_iovlen = 1; - msgh.msg_control = control; - msgh.msg_controllen = sizeof(control); - - ret = recvmsg(sockfd, &msgh, 0); - if (ret <= 0) { - RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n"); - return ret; - } - - if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { - RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n"); - return -1; - } - - for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; - cmsg = CMSG_NXTHDR(&msgh, cmsg)) { - if ((cmsg->cmsg_level == SOL_SOCKET) && - (cmsg->cmsg_type == SCM_RIGHTS)) { - memcpy(fds, CMSG_DATA(cmsg), fdsize); - break; - } - } - - return ret; -} - -/* return bytes# of read on success or negative val on failure. */ -static int -read_vhost_message(int sockfd, struct VhostUserMsg *msg) -{ - int ret; - - ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE, - msg->fds, VHOST_MEMORY_MAX_NREGIONS); - if (ret <= 0) - return ret; - - if (msg && msg->size) { - if (msg->size > sizeof(msg->payload)) { - RTE_LOG(ERR, VHOST_CONFIG, - "invalid msg size: %d\n", msg->size); - return -1; - } - ret = read(sockfd, &msg->payload, msg->size); - if (ret <= 0) - return ret; - if (ret != (int)msg->size) { - RTE_LOG(ERR, VHOST_CONFIG, - "read control message failed\n"); - return -1; - } - } - - return ret; -} - -static int -send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) -{ - - struct iovec iov; - struct msghdr msgh; - size_t fdsize = fd_num * sizeof(int); - char control[CMSG_SPACE(fdsize)]; - struct cmsghdr *cmsg; - int ret; - - memset(&msgh, 0, sizeof(msgh)); - iov.iov_base = buf; - iov.iov_len = buflen; - - msgh.msg_iov = &iov; - msgh.msg_iovlen = 1; - - if (fds && fd_num > 0) { - msgh.msg_control = control; - msgh.msg_controllen = sizeof(control); - cmsg = CMSG_FIRSTHDR(&msgh); - cmsg->cmsg_len = CMSG_LEN(fdsize); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - memcpy(CMSG_DATA(cmsg), fds, fdsize); - } else { - msgh.msg_control = NULL; - msgh.msg_controllen = 0; - } - - do { - ret = sendmsg(sockfd, &msgh, 0); - } while (ret < 0 && errno == EINTR); - - if (ret < 0) { - RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n"); - return ret; - } - - return ret; -} - -static int -send_vhost_message(int sockfd, struct VhostUserMsg *msg) -{ - int ret; - - if (!msg) - return 0; - - msg->flags &= ~VHOST_USER_VERSION_MASK; - msg->flags |= VHOST_USER_VERSION; - msg->flags |= VHOST_USER_REPLY_MASK; - - ret = send_fd_message(sockfd, (char *)msg, - VHOST_USER_HDR_SIZE + msg->size, NULL, 0); - - return ret; -} - - -static void -vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) -{ - int vid; - size_t size; - struct vhost_user_connection *conn; - int ret; - - conn = malloc(sizeof(*conn)); - if (conn == NULL) { - close(fd); - return; - } - - vid = vhost_new_device(); - if (vid == -1) { - close(fd); - free(conn); - return; - } - - size = strnlen(vsocket->path, PATH_MAX); - vhost_set_ifname(vid, vsocket->path, size); - - RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid); - - vsocket->connfd = fd; - conn->vsocket = vsocket; - conn->vid = vid; - ret = fdset_add(&vhost_user.fdset, fd, vhost_user_msg_handler, - NULL, conn); - if (ret < 0) { - vsocket->connfd = -1; - free(conn); - close(fd); - RTE_LOG(ERR, VHOST_CONFIG, - "failed to add fd %d into vhost server fdset\n", - fd); - } -} - -/* call back when there is new vhost-user connection from client */ -static void -vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused) -{ - struct vhost_user_socket *vsocket = dat; - - fd = accept(fd, NULL, NULL); - if (fd < 0) - return; - - RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd); - vhost_user_add_connection(fd, vsocket); -} - -/* callback when there is message on the connfd */ -static void -vhost_user_msg_handler(int connfd, void *dat, int *remove) -{ - int vid; - struct vhost_user_connection *conn = dat; - struct VhostUserMsg msg; - uint64_t features; - int ret; - - vid = conn->vid; - ret = read_vhost_message(connfd, &msg); - if (ret <= 0 || msg.request >= VHOST_USER_MAX) { - struct vhost_user_socket *vsocket = conn->vsocket; - - if (ret < 0) - RTE_LOG(ERR, VHOST_CONFIG, - "vhost read message failed\n"); - else if (ret == 0) - RTE_LOG(INFO, VHOST_CONFIG, - "vhost peer closed\n"); - else - RTE_LOG(ERR, VHOST_CONFIG, - "vhost read incorrect message\n"); - - vsocket->connfd = -1; - close(connfd); - *remove = 1; - free(conn); - vhost_destroy_device(vid); - - if (vsocket->reconnect) - vhost_user_create_client(vsocket); - - return; - } - - RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n", - vhost_message_str[msg.request]); - switch (msg.request) { - case VHOST_USER_GET_FEATURES: - ret = vhost_get_features(vid, &features); - msg.payload.u64 = features; - msg.size = sizeof(msg.payload.u64); - send_vhost_message(connfd, &msg); - break; - case VHOST_USER_SET_FEATURES: - features = msg.payload.u64; - vhost_set_features(vid, &features); - break; - - case VHOST_USER_GET_PROTOCOL_FEATURES: - msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES; - msg.size = sizeof(msg.payload.u64); - send_vhost_message(connfd, &msg); - break; - case VHOST_USER_SET_PROTOCOL_FEATURES: - user_set_protocol_features(vid, msg.payload.u64); - break; - - case VHOST_USER_SET_OWNER: - vhost_set_owner(vid); - break; - case VHOST_USER_RESET_OWNER: - vhost_reset_owner(vid); - break; - - case VHOST_USER_SET_MEM_TABLE: - user_set_mem_table(vid, &msg); - break; - - case VHOST_USER_SET_LOG_BASE: - user_set_log_base(vid, &msg); - - /* it needs a reply */ - msg.size = sizeof(msg.payload.u64); - send_vhost_message(connfd, &msg); - break; - case VHOST_USER_SET_LOG_FD: - close(msg.fds[0]); - RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n"); - break; - - case VHOST_USER_SET_VRING_NUM: - vhost_set_vring_num(vid, &msg.payload.state); - break; - case VHOST_USER_SET_VRING_ADDR: - vhost_set_vring_addr(vid, &msg.payload.addr); - break; - case VHOST_USER_SET_VRING_BASE: - vhost_set_vring_base(vid, &msg.payload.state); - break; - - case VHOST_USER_GET_VRING_BASE: - ret = user_get_vring_base(vid, &msg.payload.state); - msg.size = sizeof(msg.payload.state); - send_vhost_message(connfd, &msg); - break; - - case VHOST_USER_SET_VRING_KICK: - user_set_vring_kick(vid, &msg); - break; - case VHOST_USER_SET_VRING_CALL: - user_set_vring_call(vid, &msg); - break; - - case VHOST_USER_SET_VRING_ERR: - if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK)) - close(msg.fds[0]); - RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n"); - break; - - case VHOST_USER_GET_QUEUE_NUM: - msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS; - msg.size = sizeof(msg.payload.u64); - send_vhost_message(connfd, &msg); - break; - - case VHOST_USER_SET_VRING_ENABLE: - user_set_vring_enable(vid, &msg.payload.state); - break; - case VHOST_USER_SEND_RARP: - user_send_rarp(vid, &msg); - break; - - default: - break; - - } -} - -static int -create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server) -{ - int fd; - - fd = socket(AF_UNIX, SOCK_STREAM, 0); - if (fd < 0) - return -1; - RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n", - is_server ? "server" : "client", fd); - - if (!is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) { - RTE_LOG(ERR, VHOST_CONFIG, - "vhost-user: can't set nonblocking mode for socket, fd: " - "%d (%s)\n", fd, strerror(errno)); - close(fd); - return -1; - } - - memset(un, 0, sizeof(*un)); - un->sun_family = AF_UNIX; - strncpy(un->sun_path, path, sizeof(un->sun_path)); - un->sun_path[sizeof(un->sun_path) - 1] = '\0'; - - return fd; -} - -static int -vhost_user_create_server(struct vhost_user_socket *vsocket) -{ - int fd; - int ret; - struct sockaddr_un un; - const char *path = vsocket->path; - - fd = create_unix_socket(path, &un, vsocket->is_server); - if (fd < 0) - return -1; - - ret = bind(fd, (struct sockaddr *)&un, sizeof(un)); - if (ret < 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "failed to bind to %s: %s; remove it and try again\n", - path, strerror(errno)); - goto err; - } - RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path); - - ret = listen(fd, MAX_VIRTIO_BACKLOG); - if (ret < 0) - goto err; - - vsocket->listenfd = fd; - ret = fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection, - NULL, vsocket); - if (ret < 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "failed to add listen fd %d to vhost server fdset\n", - fd); - goto err; - } - - return 0; - -err: - close(fd); - return -1; -} - -struct vhost_user_reconnect { - struct sockaddr_un un; - int fd; - struct vhost_user_socket *vsocket; - - TAILQ_ENTRY(vhost_user_reconnect) next; -}; - -TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect); -struct vhost_user_reconnect_list { - struct vhost_user_reconnect_tailq_list head; - pthread_mutex_t mutex; -}; - -static struct vhost_user_reconnect_list reconn_list; -static pthread_t reconn_tid; - -static int -vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz) -{ - int ret, flags; - - ret = connect(fd, un, sz); - if (ret < 0 && errno != EISCONN) - return -1; - - flags = fcntl(fd, F_GETFL, 0); - if (flags < 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "can't get flags for connfd %d\n", fd); - return -2; - } - if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) { - RTE_LOG(ERR, VHOST_CONFIG, - "can't disable nonblocking on fd %d\n", fd); - return -2; - } - return 0; -} - -static void * -vhost_user_client_reconnect(void *arg __rte_unused) -{ - int ret; - struct vhost_user_reconnect *reconn, *next; - - while (1) { - pthread_mutex_lock(&reconn_list.mutex); - - /* - * An equal implementation of TAILQ_FOREACH_SAFE, - * which does not exist on all platforms. - */ - for (reconn = TAILQ_FIRST(&reconn_list.head); - reconn != NULL; reconn = next) { - next = TAILQ_NEXT(reconn, next); - - ret = vhost_user_connect_nonblock(reconn->fd, - (struct sockaddr *)&reconn->un, - sizeof(reconn->un)); - if (ret == -2) { - close(reconn->fd); - RTE_LOG(ERR, VHOST_CONFIG, - "reconnection for fd %d failed\n", - reconn->fd); - goto remove_fd; - } - if (ret == -1) - continue; - - RTE_LOG(INFO, VHOST_CONFIG, - "%s: connected\n", reconn->vsocket->path); - vhost_user_add_connection(reconn->fd, reconn->vsocket); -remove_fd: - TAILQ_REMOVE(&reconn_list.head, reconn, next); - free(reconn); - } - - pthread_mutex_unlock(&reconn_list.mutex); - sleep(1); - } - - return NULL; -} - -static int -vhost_user_reconnect_init(void) -{ - int ret; - - pthread_mutex_init(&reconn_list.mutex, NULL); - TAILQ_INIT(&reconn_list.head); - - ret = pthread_create(&reconn_tid, NULL, - vhost_user_client_reconnect, NULL); - if (ret < 0) - RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread"); - - return ret; -} - -static int -vhost_user_create_client(struct vhost_user_socket *vsocket) -{ - int fd; - int ret; - struct sockaddr_un un; - const char *path = vsocket->path; - struct vhost_user_reconnect *reconn; - - fd = create_unix_socket(path, &un, vsocket->is_server); - if (fd < 0) - return -1; - - ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&un, - sizeof(un)); - if (ret == 0) { - vhost_user_add_connection(fd, vsocket); - return 0; - } - - RTE_LOG(ERR, VHOST_CONFIG, - "failed to connect to %s: %s\n", - path, strerror(errno)); - - if (ret == -2 || !vsocket->reconnect) { - close(fd); - return -1; - } - - RTE_LOG(ERR, VHOST_CONFIG, "%s: reconnecting...\n", path); - reconn = malloc(sizeof(*reconn)); - if (reconn == NULL) { - RTE_LOG(ERR, VHOST_CONFIG, - "failed to allocate memory for reconnect\n"); - close(fd); - return -1; - } - reconn->un = un; - reconn->fd = fd; - reconn->vsocket = vsocket; - pthread_mutex_lock(&reconn_list.mutex); - TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next); - pthread_mutex_unlock(&reconn_list.mutex); - - return 0; -} - -/* - * Register a new vhost-user socket; here we could act as server - * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag - * is set. - */ -int -rte_vhost_driver_register(const char *path, uint64_t flags) -{ - int ret = -1; - struct vhost_user_socket *vsocket; - - if (!path) - return -1; - - pthread_mutex_lock(&vhost_user.mutex); - - if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) { - RTE_LOG(ERR, VHOST_CONFIG, - "error: the number of vhost sockets reaches maximum\n"); - goto out; - } - - vsocket = malloc(sizeof(struct vhost_user_socket)); - if (!vsocket) - goto out; - memset(vsocket, 0, sizeof(struct vhost_user_socket)); - vsocket->path = strdup(path); - vsocket->connfd = -1; - - if ((flags & RTE_VHOST_USER_CLIENT) != 0) { - vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT); - if (vsocket->reconnect && reconn_tid == 0) { - if (vhost_user_reconnect_init() < 0) { - free(vsocket->path); - free(vsocket); - goto out; - } - } - ret = vhost_user_create_client(vsocket); - } else { - vsocket->is_server = true; - ret = vhost_user_create_server(vsocket); - } - if (ret < 0) { - free(vsocket->path); - free(vsocket); - goto out; - } - - vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket; - -out: - pthread_mutex_unlock(&vhost_user.mutex); - - return ret; -} - -static bool -vhost_user_remove_reconnect(struct vhost_user_socket *vsocket) -{ - int found = false; - struct vhost_user_reconnect *reconn, *next; - - pthread_mutex_lock(&reconn_list.mutex); - - for (reconn = TAILQ_FIRST(&reconn_list.head); - reconn != NULL; reconn = next) { - next = TAILQ_NEXT(reconn, next); - - if (reconn->vsocket == vsocket) { - TAILQ_REMOVE(&reconn_list.head, reconn, next); - close(reconn->fd); - free(reconn); - found = true; - break; - } - } - pthread_mutex_unlock(&reconn_list.mutex); - return found; -} - -/** - * Unregister the specified vhost socket - */ -int -rte_vhost_driver_unregister(const char *path) -{ - int i; - int count; - struct vhost_user_connection *conn; - - pthread_mutex_lock(&vhost_user.mutex); - - for (i = 0; i < vhost_user.vsocket_cnt; i++) { - struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; - - if (!strcmp(vsocket->path, path)) { - if (vsocket->is_server) { - fdset_del(&vhost_user.fdset, vsocket->listenfd); - close(vsocket->listenfd); - unlink(path); - } else if (vsocket->reconnect) { - vhost_user_remove_reconnect(vsocket); - } - - conn = fdset_del(&vhost_user.fdset, vsocket->connfd); - if (conn) { - RTE_LOG(INFO, VHOST_CONFIG, - "free connfd = %d for device '%s'\n", - vsocket->connfd, path); - close(vsocket->connfd); - vhost_destroy_device(conn->vid); - free(conn); - } - - free(vsocket->path); - free(vsocket); - - count = --vhost_user.vsocket_cnt; - vhost_user.vsockets[i] = vhost_user.vsockets[count]; - vhost_user.vsockets[count] = NULL; - pthread_mutex_unlock(&vhost_user.mutex); - - return 0; - } - } - pthread_mutex_unlock(&vhost_user.mutex); - - return -1; -} - -int -rte_vhost_driver_session_start(void) -{ - fdset_event_dispatch(&vhost_user.fdset); - return 0; -} diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h deleted file mode 100644 index f5332396..00000000 --- a/lib/librte_vhost/vhost_user/vhost-net-user.h +++ /dev/null @@ -1,113 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _VHOST_NET_USER_H -#define _VHOST_NET_USER_H - -#include -#include - -#include "rte_virtio_net.h" - -/* refer to hw/virtio/vhost-user.c */ - -#define VHOST_MEMORY_MAX_NREGIONS 8 - -typedef enum VhostUserRequest { - VHOST_USER_NONE = 0, - VHOST_USER_GET_FEATURES = 1, - VHOST_USER_SET_FEATURES = 2, - VHOST_USER_SET_OWNER = 3, - VHOST_USER_RESET_OWNER = 4, - VHOST_USER_SET_MEM_TABLE = 5, - VHOST_USER_SET_LOG_BASE = 6, - VHOST_USER_SET_LOG_FD = 7, - VHOST_USER_SET_VRING_NUM = 8, - VHOST_USER_SET_VRING_ADDR = 9, - VHOST_USER_SET_VRING_BASE = 10, - VHOST_USER_GET_VRING_BASE = 11, - VHOST_USER_SET_VRING_KICK = 12, - VHOST_USER_SET_VRING_CALL = 13, - VHOST_USER_SET_VRING_ERR = 14, - VHOST_USER_GET_PROTOCOL_FEATURES = 15, - VHOST_USER_SET_PROTOCOL_FEATURES = 16, - VHOST_USER_GET_QUEUE_NUM = 17, - VHOST_USER_SET_VRING_ENABLE = 18, - VHOST_USER_SEND_RARP = 19, - VHOST_USER_MAX -} VhostUserRequest; - -typedef struct VhostUserMemoryRegion { - uint64_t guest_phys_addr; - uint64_t memory_size; - uint64_t userspace_addr; - uint64_t mmap_offset; -} VhostUserMemoryRegion; - -typedef struct VhostUserMemory { - uint32_t nregions; - uint32_t padding; - VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; -} VhostUserMemory; - -typedef struct VhostUserLog { - uint64_t mmap_size; - uint64_t mmap_offset; -} VhostUserLog; - -typedef struct VhostUserMsg { - VhostUserRequest request; - -#define VHOST_USER_VERSION_MASK 0x3 -#define VHOST_USER_REPLY_MASK (0x1 << 2) - uint32_t flags; - uint32_t size; /* the following payload size */ - union { -#define VHOST_USER_VRING_IDX_MASK 0xff -#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) - uint64_t u64; - struct vhost_vring_state state; - struct vhost_vring_addr addr; - VhostUserMemory memory; - VhostUserLog log; - } payload; - int fds[VHOST_MEMORY_MAX_NREGIONS]; -} __attribute((packed)) VhostUserMsg; - -#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) - -/* The version of the protocol we support */ -#define VHOST_USER_VERSION 0x1 - -/*****************************************************************************/ -#endif diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c deleted file mode 100644 index e7c43479..00000000 --- a/lib/librte_vhost/vhost_user/virtio-net-user.c +++ /dev/null @@ -1,470 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "virtio-net-user.h" -#include "vhost-net-user.h" -#include "vhost-net.h" - -struct orig_region_map { - int fd; - uint64_t mapped_address; - uint64_t mapped_size; - uint64_t blksz; -}; - -#define orig_region(ptr, nregions) \ - ((struct orig_region_map *)RTE_PTR_ADD((ptr), \ - sizeof(struct virtio_memory) + \ - sizeof(struct virtio_memory_regions) * (nregions))) - -static uint64_t -get_blk_size(int fd) -{ - struct stat stat; - int ret; - - ret = fstat(fd, &stat); - return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; -} - -static void -free_mem_region(struct virtio_net *dev) -{ - struct orig_region_map *region; - unsigned int idx; - - if (!dev || !dev->mem) - return; - - region = orig_region(dev->mem, dev->mem->nregions); - for (idx = 0; idx < dev->mem->nregions; idx++) { - if (region[idx].mapped_address) { - munmap((void *)(uintptr_t)region[idx].mapped_address, - region[idx].mapped_size); - close(region[idx].fd); - } - } -} - -void -vhost_backend_cleanup(struct virtio_net *dev) -{ - if (dev->mem) { - free_mem_region(dev); - free(dev->mem); - dev->mem = NULL; - } - if (dev->log_addr) { - munmap((void *)(uintptr_t)dev->log_addr, dev->log_size); - dev->log_addr = 0; - } -} - -int -user_set_mem_table(int vid, struct VhostUserMsg *pmsg) -{ - struct VhostUserMemory memory = pmsg->payload.memory; - struct virtio_memory_regions *pregion; - uint64_t mapped_address, mapped_size; - struct virtio_net *dev; - unsigned int idx = 0; - struct orig_region_map *pregion_orig; - uint64_t alignment; - - /* unmap old memory regions one by one*/ - dev = get_device(vid); - if (dev == NULL) - return -1; - - /* Remove from the data plane. */ - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - notify_ops->destroy_device(vid); - } - - if (dev->mem) { - free_mem_region(dev); - free(dev->mem); - dev->mem = NULL; - } - - dev->mem = calloc(1, - sizeof(struct virtio_memory) + - sizeof(struct virtio_memory_regions) * memory.nregions + - sizeof(struct orig_region_map) * memory.nregions); - if (dev->mem == NULL) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) failed to allocate memory for dev->mem\n", - dev->vid); - return -1; - } - dev->mem->nregions = memory.nregions; - - pregion_orig = orig_region(dev->mem, memory.nregions); - for (idx = 0; idx < memory.nregions; idx++) { - pregion = &dev->mem->regions[idx]; - pregion->guest_phys_address = - memory.regions[idx].guest_phys_addr; - pregion->guest_phys_address_end = - memory.regions[idx].guest_phys_addr + - memory.regions[idx].memory_size; - pregion->memory_size = - memory.regions[idx].memory_size; - pregion->userspace_address = - memory.regions[idx].userspace_addr; - - /* This is ugly */ - mapped_size = memory.regions[idx].memory_size + - memory.regions[idx].mmap_offset; - - /* mmap() without flag of MAP_ANONYMOUS, should be called - * with length argument aligned with hugepagesz at older - * longterm version Linux, like 2.6.32 and 3.2.72, or - * mmap() will fail with EINVAL. - * - * to avoid failure, make sure in caller to keep length - * aligned. - */ - alignment = get_blk_size(pmsg->fds[idx]); - if (alignment == (uint64_t)-1) { - RTE_LOG(ERR, VHOST_CONFIG, - "couldn't get hugepage size through fstat\n"); - goto err_mmap; - } - mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment); - - mapped_address = (uint64_t)(uintptr_t)mmap(NULL, - mapped_size, - PROT_READ | PROT_WRITE, MAP_SHARED, - pmsg->fds[idx], - 0); - - RTE_LOG(INFO, VHOST_CONFIG, - "mapped region %d fd:%d to:%p sz:0x%"PRIx64" " - "off:0x%"PRIx64" align:0x%"PRIx64"\n", - idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address, - mapped_size, memory.regions[idx].mmap_offset, - alignment); - - if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) { - RTE_LOG(ERR, VHOST_CONFIG, - "mmap qemu guest failed.\n"); - goto err_mmap; - } - - pregion_orig[idx].mapped_address = mapped_address; - pregion_orig[idx].mapped_size = mapped_size; - pregion_orig[idx].blksz = alignment; - pregion_orig[idx].fd = pmsg->fds[idx]; - - mapped_address += memory.regions[idx].mmap_offset; - - pregion->address_offset = mapped_address - - pregion->guest_phys_address; - - if (memory.regions[idx].guest_phys_addr == 0) { - dev->mem->base_address = - memory.regions[idx].userspace_addr; - dev->mem->mapped_address = - pregion->address_offset; - } - - LOG_DEBUG(VHOST_CONFIG, - "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n", - idx, - (void *)(uintptr_t)pregion->guest_phys_address, - (void *)(uintptr_t)pregion->userspace_address, - pregion->memory_size); - } - - return 0; - -err_mmap: - while (idx--) { - munmap((void *)(uintptr_t)pregion_orig[idx].mapped_address, - pregion_orig[idx].mapped_size); - close(pregion_orig[idx].fd); - } - free(dev->mem); - dev->mem = NULL; - return -1; -} - -static int -vq_is_ready(struct vhost_virtqueue *vq) -{ - return vq && vq->desc && - vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD && - vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD; -} - -static int -virtio_is_ready(struct virtio_net *dev) -{ - struct vhost_virtqueue *rvq, *tvq; - uint32_t i; - - for (i = 0; i < dev->virt_qp_nb; i++) { - rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ]; - tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ]; - - if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) { - RTE_LOG(INFO, VHOST_CONFIG, - "virtio is not ready for processing.\n"); - return 0; - } - } - - RTE_LOG(INFO, VHOST_CONFIG, - "virtio is now ready for processing.\n"); - return 1; -} - -void -user_set_vring_call(int vid, struct VhostUserMsg *pmsg) -{ - struct vhost_vring_file file; - - file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; - if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) - file.fd = VIRTIO_INVALID_EVENTFD; - else - file.fd = pmsg->fds[0]; - RTE_LOG(INFO, VHOST_CONFIG, - "vring call idx:%d file:%d\n", file.index, file.fd); - vhost_set_vring_call(vid, &file); -} - - -/* - * In vhost-user, when we receive kick message, will test whether virtio - * device is ready for packet processing. - */ -void -user_set_vring_kick(int vid, struct VhostUserMsg *pmsg) -{ - struct vhost_vring_file file; - struct virtio_net *dev = get_device(vid); - - if (!dev) - return; - - file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; - if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) - file.fd = VIRTIO_INVALID_EVENTFD; - else - file.fd = pmsg->fds[0]; - RTE_LOG(INFO, VHOST_CONFIG, - "vring kick idx:%d file:%d\n", file.index, file.fd); - vhost_set_vring_kick(vid, &file); - - if (virtio_is_ready(dev) && !(dev->flags & VIRTIO_DEV_RUNNING)) { - if (notify_ops->new_device(vid) == 0) - dev->flags |= VIRTIO_DEV_RUNNING; - } -} - -/* - * when virtio is stopped, qemu will send us the GET_VRING_BASE message. - */ -int -user_get_vring_base(int vid, struct vhost_vring_state *state) -{ - struct virtio_net *dev = get_device(vid); - - if (dev == NULL) - return -1; - /* We have to stop the queue (virtio) if it is running. */ - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - notify_ops->destroy_device(vid); - } - - /* Here we are safe to get the last used index */ - vhost_get_vring_base(vid, state->index, state); - - RTE_LOG(INFO, VHOST_CONFIG, - "vring base idx:%d file:%d\n", state->index, state->num); - /* - * Based on current qemu vhost-user implementation, this message is - * sent and only sent in vhost_vring_stop. - * TODO: cleanup the vring, it isn't usable since here. - */ - if (dev->virtqueue[state->index]->kickfd >= 0) - close(dev->virtqueue[state->index]->kickfd); - - dev->virtqueue[state->index]->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; - - return 0; -} - -/* - * when virtio queues are ready to work, qemu will send us to - * enable the virtio queue pair. - */ -int -user_set_vring_enable(int vid, struct vhost_vring_state *state) -{ - struct virtio_net *dev; - int enable = (int)state->num; - - dev = get_device(vid); - if (dev == NULL) - return -1; - - RTE_LOG(INFO, VHOST_CONFIG, - "set queue enable: %d to qp idx: %d\n", - enable, state->index); - - if (notify_ops->vring_state_changed) - notify_ops->vring_state_changed(vid, state->index, enable); - - dev->virtqueue[state->index]->enabled = enable; - - return 0; -} - -void -user_set_protocol_features(int vid, uint64_t protocol_features) -{ - struct virtio_net *dev; - - dev = get_device(vid); - if (dev == NULL || protocol_features & ~VHOST_USER_PROTOCOL_FEATURES) - return; - - dev->protocol_features = protocol_features; -} - -int -user_set_log_base(int vid, struct VhostUserMsg *msg) -{ - struct virtio_net *dev; - int fd = msg->fds[0]; - uint64_t size, off; - void *addr; - - dev = get_device(vid); - if (!dev) - return -1; - - if (fd < 0) { - RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd); - return -1; - } - - if (msg->size != sizeof(VhostUserLog)) { - RTE_LOG(ERR, VHOST_CONFIG, - "invalid log base msg size: %"PRId32" != %d\n", - msg->size, (int)sizeof(VhostUserLog)); - return -1; - } - - size = msg->payload.log.mmap_size; - off = msg->payload.log.mmap_offset; - RTE_LOG(INFO, VHOST_CONFIG, - "log mmap size: %"PRId64", offset: %"PRId64"\n", - size, off); - - /* - * mmap from 0 to workaround a hugepage mmap bug: mmap will - * fail when offset is not page size aligned. - */ - addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - close(fd); - if (addr == MAP_FAILED) { - RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n"); - return -1; - } - - /* - * Free previously mapped log memory on occasionally - * multiple VHOST_USER_SET_LOG_BASE. - */ - if (dev->log_addr) { - munmap((void *)(uintptr_t)dev->log_addr, dev->log_size); - } - dev->log_addr = (uint64_t)(uintptr_t)addr; - dev->log_base = dev->log_addr + off; - dev->log_size = size; - - return 0; -} - -/* - * An rarp packet is constructed and broadcasted to notify switches about - * the new location of the migrated VM, so that packets from outside will - * not be lost after migration. - * - * However, we don't actually "send" a rarp packet here, instead, we set - * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it. - */ -int -user_send_rarp(int vid, struct VhostUserMsg *msg) -{ - struct virtio_net *dev; - uint8_t *mac = (uint8_t *)&msg->payload.u64; - - dev = get_device(vid); - if (!dev) - return -1; - - RTE_LOG(DEBUG, VHOST_CONFIG, - ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n", - mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); - memcpy(dev->mac.addr_bytes, mac, 6); - - /* - * Set the flag to inject a RARP broadcast packet at - * rte_vhost_dequeue_burst(). - * - * rte_smp_wmb() is for making sure the mac is copied - * before the flag is set. - */ - rte_smp_wmb(); - rte_atomic16_set(&dev->broadcast_rarp, 1); - - return 0; -} diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h deleted file mode 100644 index e1b967b8..00000000 --- a/lib/librte_vhost/vhost_user/virtio-net-user.h +++ /dev/null @@ -1,62 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _VIRTIO_NET_USER_H -#define _VIRTIO_NET_USER_H - -#include "vhost-net.h" -#include "vhost-net-user.h" - -#define VHOST_USER_PROTOCOL_F_MQ 0 -#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 -#define VHOST_USER_PROTOCOL_F_RARP 2 - -#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ - (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\ - (1ULL << VHOST_USER_PROTOCOL_F_RARP)) - -int user_set_mem_table(int, struct VhostUserMsg *); - -void user_set_vring_call(int, struct VhostUserMsg *); - -void user_set_vring_kick(int, struct VhostUserMsg *); - -void user_set_protocol_features(int vid, uint64_t protocol_features); -int user_set_log_base(int vid, struct VhostUserMsg *); -int user_send_rarp(int vid, struct VhostUserMsg *); - -int user_get_vring_base(int, struct vhost_vring_state *); - -int user_set_vring_enable(int vid, struct vhost_vring_state *state); - -#endif diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c deleted file mode 100644 index 1785695b..00000000 --- a/lib/librte_vhost/virtio-net.c +++ /dev/null @@ -1,847 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef RTE_LIBRTE_VHOST_NUMA -#include -#endif - -#include - -#include -#include -#include -#include -#include -#include - -#include "vhost-net.h" - -#define MAX_VHOST_DEVICE 1024 -static struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; - -/* device ops to add/remove device to/from data core. */ -struct virtio_net_device_ops const *notify_ops; - -#define VHOST_USER_F_PROTOCOL_FEATURES 30 - -/* Features supported by this lib. */ -#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \ - (1ULL << VIRTIO_NET_F_CTRL_VQ) | \ - (1ULL << VIRTIO_NET_F_CTRL_RX) | \ - (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \ - (VHOST_SUPPORTS_MQ) | \ - (1ULL << VIRTIO_F_VERSION_1) | \ - (1ULL << VHOST_F_LOG_ALL) | \ - (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \ - (1ULL << VIRTIO_NET_F_HOST_TSO4) | \ - (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ - (1ULL << VIRTIO_NET_F_CSUM) | \ - (1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ - (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ - (1ULL << VIRTIO_NET_F_GUEST_TSO6)) - -static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES; - - -/* - * Converts QEMU virtual address to Vhost virtual address. This function is - * used to convert the ring addresses to our address space. - */ -static uint64_t -qva_to_vva(struct virtio_net *dev, uint64_t qemu_va) -{ - struct virtio_memory_regions *region; - uint64_t vhost_va = 0; - uint32_t regionidx = 0; - - /* Find the region where the address lives. */ - for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) { - region = &dev->mem->regions[regionidx]; - if ((qemu_va >= region->userspace_address) && - (qemu_va <= region->userspace_address + - region->memory_size)) { - vhost_va = qemu_va + region->guest_phys_address + - region->address_offset - - region->userspace_address; - break; - } - } - return vhost_va; -} - -struct virtio_net * -get_device(int vid) -{ - struct virtio_net *dev = vhost_devices[vid]; - - if (unlikely(!dev)) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) device not found.\n", vid); - } - - return dev; -} - -static void -cleanup_vq(struct vhost_virtqueue *vq, int destroy) -{ - if ((vq->callfd >= 0) && (destroy != 0)) - close(vq->callfd); - if (vq->kickfd >= 0) - close(vq->kickfd); -} - -/* - * Unmap any memory, close any file descriptors and - * free any memory owned by a device. - */ -static void -cleanup_device(struct virtio_net *dev, int destroy) -{ - uint32_t i; - - vhost_backend_cleanup(dev); - - for (i = 0; i < dev->virt_qp_nb; i++) { - cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ], destroy); - cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ], destroy); - } -} - -/* - * Release virtqueues and device memory. - */ -static void -free_device(struct virtio_net *dev) -{ - uint32_t i; - - for (i = 0; i < dev->virt_qp_nb; i++) - rte_free(dev->virtqueue[i * VIRTIO_QNUM]); - - rte_free(dev); -} - -static void -init_vring_queue(struct vhost_virtqueue *vq, int qp_idx) -{ - memset(vq, 0, sizeof(struct vhost_virtqueue)); - - vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; - vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; - - /* Backends are set to -1 indicating an inactive device. */ - vq->backend = -1; - - /* always set the default vq pair to enabled */ - if (qp_idx == 0) - vq->enabled = 1; -} - -static void -init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx) -{ - uint32_t base_idx = qp_idx * VIRTIO_QNUM; - - init_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx); - init_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx); -} - -static void -reset_vring_queue(struct vhost_virtqueue *vq, int qp_idx) -{ - int callfd; - - callfd = vq->callfd; - init_vring_queue(vq, qp_idx); - vq->callfd = callfd; -} - -static void -reset_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx) -{ - uint32_t base_idx = qp_idx * VIRTIO_QNUM; - - reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx); - reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx); -} - -static int -alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx) -{ - struct vhost_virtqueue *virtqueue = NULL; - uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ; - uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ; - - virtqueue = rte_malloc(NULL, - sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0); - if (virtqueue == NULL) { - RTE_LOG(ERR, VHOST_CONFIG, - "Failed to allocate memory for virt qp:%d.\n", qp_idx); - return -1; - } - - dev->virtqueue[virt_rx_q_idx] = virtqueue; - dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ; - - init_vring_queue_pair(dev, qp_idx); - - dev->virt_qp_nb += 1; - - return 0; -} - -/* - * Reset some variables in device structure, while keeping few - * others untouched, such as vid, ifname, virt_qp_nb: they - * should be same unless the device is removed. - */ -static void -reset_device(struct virtio_net *dev) -{ - uint32_t i; - - dev->features = 0; - dev->protocol_features = 0; - dev->flags = 0; - - for (i = 0; i < dev->virt_qp_nb; i++) - reset_vring_queue_pair(dev, i); -} - -/* - * Function is called from the CUSE open function. The device structure is - * initialised and a new entry is added to the device configuration linked - * list. - */ -int -vhost_new_device(void) -{ - struct virtio_net *dev; - int i; - - dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0); - if (dev == NULL) { - RTE_LOG(ERR, VHOST_CONFIG, - "Failed to allocate memory for new dev.\n"); - return -1; - } - - for (i = 0; i < MAX_VHOST_DEVICE; i++) { - if (vhost_devices[i] == NULL) - break; - } - if (i == MAX_VHOST_DEVICE) { - RTE_LOG(ERR, VHOST_CONFIG, - "Failed to find a free slot for new device.\n"); - return -1; - } - - vhost_devices[i] = dev; - dev->vid = i; - - return i; -} - -/* - * Function is called from the CUSE release function. This function will - * cleanup the device and remove it from device configuration linked list. - */ -void -vhost_destroy_device(int vid) -{ - struct virtio_net *dev = get_device(vid); - - if (dev == NULL) - return; - - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - notify_ops->destroy_device(vid); - } - - cleanup_device(dev, 1); - free_device(dev); - - vhost_devices[vid] = NULL; -} - -void -vhost_set_ifname(int vid, const char *if_name, unsigned int if_len) -{ - struct virtio_net *dev; - unsigned int len; - - dev = get_device(vid); - if (dev == NULL) - return; - - len = if_len > sizeof(dev->ifname) ? - sizeof(dev->ifname) : if_len; - - strncpy(dev->ifname, if_name, len); - dev->ifname[sizeof(dev->ifname) - 1] = '\0'; -} - - -/* - * Called from CUSE IOCTL: VHOST_SET_OWNER - * This function just returns success at the moment unless - * the device hasn't been initialised. - */ -int -vhost_set_owner(int vid) -{ - struct virtio_net *dev; - - dev = get_device(vid); - if (dev == NULL) - return -1; - - return 0; -} - -/* - * Called from CUSE IOCTL: VHOST_RESET_OWNER - */ -int -vhost_reset_owner(int vid) -{ - struct virtio_net *dev; - - dev = get_device(vid); - if (dev == NULL) - return -1; - - if (dev->flags & VIRTIO_DEV_RUNNING) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - notify_ops->destroy_device(vid); - } - - cleanup_device(dev, 0); - reset_device(dev); - return 0; -} - -/* - * Called from CUSE IOCTL: VHOST_GET_FEATURES - * The features that we support are requested. - */ -int -vhost_get_features(int vid, uint64_t *pu) -{ - struct virtio_net *dev; - - dev = get_device(vid); - if (dev == NULL) - return -1; - - /* Send our supported features. */ - *pu = VHOST_FEATURES; - return 0; -} - -/* - * Called from CUSE IOCTL: VHOST_SET_FEATURES - * We receive the negotiated features supported by us and the virtio device. - */ -int -vhost_set_features(int vid, uint64_t *pu) -{ - struct virtio_net *dev; - - dev = get_device(vid); - if (dev == NULL) - return -1; - if (*pu & ~VHOST_FEATURES) - return -1; - - dev->features = *pu; - if (dev->features & - ((1 << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) { - dev->vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf); - } else { - dev->vhost_hlen = sizeof(struct virtio_net_hdr); - } - LOG_DEBUG(VHOST_CONFIG, - "(%d) mergeable RX buffers %s, virtio 1 %s\n", - dev->vid, - (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off", - (dev->features & (1ULL << VIRTIO_F_VERSION_1)) ? "on" : "off"); - - return 0; -} - -/* - * Called from CUSE IOCTL: VHOST_SET_VRING_NUM - * The virtio device sends us the size of the descriptor ring. - */ -int -vhost_set_vring_num(int vid, struct vhost_vring_state *state) -{ - struct virtio_net *dev; - - dev = get_device(vid); - if (dev == NULL) - return -1; - - /* State->index refers to the queue index. The txq is 1, rxq is 0. */ - dev->virtqueue[state->index]->size = state->num; - - return 0; -} - -/* - * Reallocate virtio_dev and vhost_virtqueue data structure to make them on the - * same numa node as the memory of vring descriptor. - */ -#ifdef RTE_LIBRTE_VHOST_NUMA -static struct virtio_net* -numa_realloc(struct virtio_net *dev, int index) -{ - int oldnode, newnode; - struct virtio_net *old_dev; - struct vhost_virtqueue *old_vq, *vq; - int ret; - - /* - * vq is allocated on pairs, we should try to do realloc - * on first queue of one queue pair only. - */ - if (index % VIRTIO_QNUM != 0) - return dev; - - old_dev = dev; - vq = old_vq = dev->virtqueue[index]; - - ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc, - MPOL_F_NODE | MPOL_F_ADDR); - - /* check if we need to reallocate vq */ - ret |= get_mempolicy(&oldnode, NULL, 0, old_vq, - MPOL_F_NODE | MPOL_F_ADDR); - if (ret) { - RTE_LOG(ERR, VHOST_CONFIG, - "Unable to get vq numa information.\n"); - return dev; - } - if (oldnode != newnode) { - RTE_LOG(INFO, VHOST_CONFIG, - "reallocate vq from %d to %d node\n", oldnode, newnode); - vq = rte_malloc_socket(NULL, sizeof(*vq) * VIRTIO_QNUM, 0, - newnode); - if (!vq) - return dev; - - memcpy(vq, old_vq, sizeof(*vq) * VIRTIO_QNUM); - rte_free(old_vq); - } - - /* check if we need to reallocate dev */ - ret = get_mempolicy(&oldnode, NULL, 0, old_dev, - MPOL_F_NODE | MPOL_F_ADDR); - if (ret) { - RTE_LOG(ERR, VHOST_CONFIG, - "Unable to get dev numa information.\n"); - goto out; - } - if (oldnode != newnode) { - RTE_LOG(INFO, VHOST_CONFIG, - "reallocate dev from %d to %d node\n", - oldnode, newnode); - dev = rte_malloc_socket(NULL, sizeof(*dev), 0, newnode); - if (!dev) { - dev = old_dev; - goto out; - } - - memcpy(dev, old_dev, sizeof(*dev)); - rte_free(old_dev); - } - -out: - dev->virtqueue[index] = vq; - dev->virtqueue[index + 1] = vq + 1; - vhost_devices[dev->vid] = dev; - - return dev; -} -#else -static struct virtio_net* -numa_realloc(struct virtio_net *dev, int index __rte_unused) -{ - return dev; -} -#endif - -/* - * Called from CUSE IOCTL: VHOST_SET_VRING_ADDR - * The virtio device sends us the desc, used and avail ring addresses. - * This function then converts these to our address space. - */ -int -vhost_set_vring_addr(int vid, struct vhost_vring_addr *addr) -{ - struct virtio_net *dev; - struct vhost_virtqueue *vq; - - dev = get_device(vid); - if ((dev == NULL) || (dev->mem == NULL)) - return -1; - - /* addr->index refers to the queue index. The txq 1, rxq is 0. */ - vq = dev->virtqueue[addr->index]; - - /* The addresses are converted from QEMU virtual to Vhost virtual. */ - vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev, - addr->desc_user_addr); - if (vq->desc == 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) failed to find desc ring address.\n", - dev->vid); - return -1; - } - - dev = numa_realloc(dev, addr->index); - vq = dev->virtqueue[addr->index]; - - vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev, - addr->avail_user_addr); - if (vq->avail == 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) failed to find avail ring address.\n", - dev->vid); - return -1; - } - - vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev, - addr->used_user_addr); - if (vq->used == 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) failed to find used ring address.\n", - dev->vid); - return -1; - } - - if (vq->last_used_idx != vq->used->idx) { - RTE_LOG(WARNING, VHOST_CONFIG, - "last_used_idx (%u) and vq->used->idx (%u) mismatches; " - "some packets maybe resent for Tx and dropped for Rx\n", - vq->last_used_idx, vq->used->idx); - vq->last_used_idx = vq->used->idx; - } - - vq->log_guest_addr = addr->log_guest_addr; - - LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n", - dev->vid, vq->desc); - LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n", - dev->vid, vq->avail); - LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n", - dev->vid, vq->used); - LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n", - dev->vid, vq->log_guest_addr); - - return 0; -} - -/* - * Called from CUSE IOCTL: VHOST_SET_VRING_BASE - * The virtio device sends us the available ring last used index. - */ -int -vhost_set_vring_base(int vid, struct vhost_vring_state *state) -{ - struct virtio_net *dev; - - dev = get_device(vid); - if (dev == NULL) - return -1; - - /* State->index refers to the queue index. The txq is 1, rxq is 0. */ - dev->virtqueue[state->index]->last_used_idx = state->num; - - return 0; -} - -/* - * Called from CUSE IOCTL: VHOST_GET_VRING_BASE - * We send the virtio device our available ring last used index. - */ -int -vhost_get_vring_base(int vid, uint32_t index, - struct vhost_vring_state *state) -{ - struct virtio_net *dev; - - dev = get_device(vid); - if (dev == NULL) - return -1; - - state->index = index; - /* State->index refers to the queue index. The txq is 1, rxq is 0. */ - state->num = dev->virtqueue[state->index]->last_used_idx; - - return 0; -} - - -/* - * Called from CUSE IOCTL: VHOST_SET_VRING_CALL - * The virtio device sends an eventfd to interrupt the guest. This fd gets - * copied into our process space. - */ -int -vhost_set_vring_call(int vid, struct vhost_vring_file *file) -{ - struct virtio_net *dev; - struct vhost_virtqueue *vq; - uint32_t cur_qp_idx = file->index / VIRTIO_QNUM; - - dev = get_device(vid); - if (dev == NULL) - return -1; - - /* - * FIXME: VHOST_SET_VRING_CALL is the first per-vring message - * we get, so we do vring queue pair allocation here. - */ - if (cur_qp_idx + 1 > dev->virt_qp_nb) { - if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0) - return -1; - } - - /* file->index refers to the queue index. The txq is 1, rxq is 0. */ - vq = dev->virtqueue[file->index]; - assert(vq != NULL); - - if (vq->callfd >= 0) - close(vq->callfd); - - vq->callfd = file->fd; - - return 0; -} - -/* - * Called from CUSE IOCTL: VHOST_SET_VRING_KICK - * The virtio device sends an eventfd that it can use to notify us. - * This fd gets copied into our process space. - */ -int -vhost_set_vring_kick(int vid, struct vhost_vring_file *file) -{ - struct virtio_net *dev; - struct vhost_virtqueue *vq; - - dev = get_device(vid); - if (dev == NULL) - return -1; - - /* file->index refers to the queue index. The txq is 1, rxq is 0. */ - vq = dev->virtqueue[file->index]; - - if (vq->kickfd >= 0) - close(vq->kickfd); - - vq->kickfd = file->fd; - - return 0; -} - -/* - * Called from CUSE IOCTL: VHOST_NET_SET_BACKEND - * To complete device initialisation when the virtio driver is loaded, - * we are provided with a valid fd for a tap device (not used by us). - * If this happens then we can add the device to a data core. - * When the virtio driver is removed we get fd=-1. - * At that point we remove the device from the data core. - * The device will still exist in the device configuration linked list. - */ -int -vhost_set_backend(int vid, struct vhost_vring_file *file) -{ - struct virtio_net *dev; - - dev = get_device(vid); - if (dev == NULL) - return -1; - - /* file->index refers to the queue index. The txq is 1, rxq is 0. */ - dev->virtqueue[file->index]->backend = file->fd; - - /* - * If the device isn't already running and both backend fds are set, - * we add the device. - */ - if (!(dev->flags & VIRTIO_DEV_RUNNING)) { - if (dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED && - dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED) { - if (notify_ops->new_device(vid) < 0) - return -1; - dev->flags |= VIRTIO_DEV_RUNNING; - } - } else if (file->fd == VIRTIO_DEV_STOPPED) { - dev->flags &= ~VIRTIO_DEV_RUNNING; - notify_ops->destroy_device(vid); - } - - return 0; -} - -int -rte_vhost_get_numa_node(int vid) -{ -#ifdef RTE_LIBRTE_VHOST_NUMA - struct virtio_net *dev = get_device(vid); - int numa_node; - int ret; - - if (dev == NULL) - return -1; - - ret = get_mempolicy(&numa_node, NULL, 0, dev, - MPOL_F_NODE | MPOL_F_ADDR); - if (ret < 0) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) failed to query numa node: %d\n", vid, ret); - return -1; - } - - return numa_node; -#else - RTE_SET_USED(vid); - return -1; -#endif -} - -uint32_t -rte_vhost_get_queue_num(int vid) -{ - struct virtio_net *dev = get_device(vid); - - if (dev == NULL) - return 0; - - return dev->virt_qp_nb; -} - -int -rte_vhost_get_ifname(int vid, char *buf, size_t len) -{ - struct virtio_net *dev = get_device(vid); - - if (dev == NULL) - return -1; - - len = RTE_MIN(len, sizeof(dev->ifname)); - - strncpy(buf, dev->ifname, len); - buf[len - 1] = '\0'; - - return 0; -} - -uint16_t -rte_vhost_avail_entries(int vid, uint16_t queue_id) -{ - struct virtio_net *dev; - struct vhost_virtqueue *vq; - - dev = get_device(vid); - if (!dev) - return 0; - - vq = dev->virtqueue[queue_id]; - if (!vq->enabled) - return 0; - - return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx; -} - -int -rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable) -{ - struct virtio_net *dev = get_device(vid); - - if (dev == NULL) - return -1; - - if (enable) { - RTE_LOG(ERR, VHOST_CONFIG, - "guest notification isn't supported.\n"); - return -1; - } - - dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY; - return 0; -} - -uint64_t rte_vhost_feature_get(void) -{ - return VHOST_FEATURES; -} - -int rte_vhost_feature_disable(uint64_t feature_mask) -{ - VHOST_FEATURES = VHOST_FEATURES & ~feature_mask; - return 0; -} - -int rte_vhost_feature_enable(uint64_t feature_mask) -{ - if ((feature_mask & VHOST_SUPPORTED_FEATURES) == feature_mask) { - VHOST_FEATURES = VHOST_FEATURES | feature_mask; - return 0; - } - return -1; -} - -/* - * Register ops so that we can add/remove device to data core. - */ -int -rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const ops) -{ - notify_ops = ops; - - return 0; -} diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c new file mode 100644 index 00000000..595f67c4 --- /dev/null +++ b/lib/librte_vhost/virtio_net.c @@ -0,0 +1,1182 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vhost.h" + +#define MAX_PKT_BURST 32 +#define VHOST_LOG_PAGE 4096 + +static inline void __attribute__((always_inline)) +vhost_log_page(uint8_t *log_base, uint64_t page) +{ + log_base[page / 8] |= 1 << (page % 8); +} + +static inline void __attribute__((always_inline)) +vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) +{ + uint64_t page; + + if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) || + !dev->log_base || !len)) + return; + + if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8))) + return; + + /* To make sure guest memory updates are committed before logging */ + rte_smp_wmb(); + + page = addr / VHOST_LOG_PAGE; + while (page * VHOST_LOG_PAGE < addr + len) { + vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); + page += 1; + } +} + +static inline void __attribute__((always_inline)) +vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t offset, uint64_t len) +{ + vhost_log_write(dev, vq->log_guest_addr + offset, len); +} + +static bool +is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb) +{ + return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM; +} + +static inline void __attribute__((always_inline)) +do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint16_t to, uint16_t from, uint16_t size) +{ + rte_memcpy(&vq->used->ring[to], + &vq->shadow_used_ring[from], + size * sizeof(struct vring_used_elem)); + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[to]), + size * sizeof(struct vring_used_elem)); +} + +static inline void __attribute__((always_inline)) +flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + uint16_t used_idx = vq->last_used_idx & (vq->size - 1); + + if (used_idx + vq->shadow_used_idx <= vq->size) { + do_flush_shadow_used_ring(dev, vq, used_idx, 0, + vq->shadow_used_idx); + } else { + uint16_t size; + + /* update used ring interval [used_idx, vq->size] */ + size = vq->size - used_idx; + do_flush_shadow_used_ring(dev, vq, used_idx, 0, size); + + /* update the left half used ring interval [0, left_size] */ + do_flush_shadow_used_ring(dev, vq, 0, size, + vq->shadow_used_idx - size); + } + vq->last_used_idx += vq->shadow_used_idx; + + rte_smp_wmb(); + + *(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx; + vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), + sizeof(vq->used->idx)); +} + +static inline void __attribute__((always_inline)) +update_shadow_used_ring(struct vhost_virtqueue *vq, + uint16_t desc_idx, uint16_t len) +{ + uint16_t i = vq->shadow_used_idx++; + + vq->shadow_used_ring[i].id = desc_idx; + vq->shadow_used_ring[i].len = len; +} + +static void +virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) +{ + if (m_buf->ol_flags & PKT_TX_L4_MASK) { + net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; + + switch (m_buf->ol_flags & PKT_TX_L4_MASK) { + case PKT_TX_TCP_CKSUM: + net_hdr->csum_offset = (offsetof(struct tcp_hdr, + cksum)); + break; + case PKT_TX_UDP_CKSUM: + net_hdr->csum_offset = (offsetof(struct udp_hdr, + dgram_cksum)); + break; + case PKT_TX_SCTP_CKSUM: + net_hdr->csum_offset = (offsetof(struct sctp_hdr, + cksum)); + break; + } + } + + if (m_buf->ol_flags & PKT_TX_TCP_SEG) { + if (m_buf->ol_flags & PKT_TX_IPV4) + net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; + else + net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + net_hdr->gso_size = m_buf->tso_segsz; + net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len + + m_buf->l4_len; + } +} + +static inline void +copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr, + struct virtio_net_hdr_mrg_rxbuf hdr) +{ + if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf)) + *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr; + else + *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr; +} + +static inline int __attribute__((always_inline)) +copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs, + struct rte_mbuf *m, uint16_t desc_idx, uint32_t size) +{ + uint32_t desc_avail, desc_offset; + uint32_t mbuf_avail, mbuf_offset; + uint32_t cpy_len; + struct vring_desc *desc; + uint64_t desc_addr; + struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; + + desc = &descs[desc_idx]; + desc_addr = gpa_to_vva(dev, desc->addr); + /* + * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid + * performance issue with some versions of gcc (4.8.4 and 5.3.0) which + * otherwise stores offset on the stack instead of in a register. + */ + if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) + return -1; + + rte_prefetch0((void *)(uintptr_t)desc_addr); + + virtio_enqueue_offload(m, &virtio_hdr.hdr); + copy_virtio_net_hdr(dev, desc_addr, virtio_hdr); + vhost_log_write(dev, desc->addr, dev->vhost_hlen); + PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); + + desc_offset = dev->vhost_hlen; + desc_avail = desc->len - dev->vhost_hlen; + + mbuf_avail = rte_pktmbuf_data_len(m); + mbuf_offset = 0; + while (mbuf_avail != 0 || m->next != NULL) { + /* done with current mbuf, fetch next */ + if (mbuf_avail == 0) { + m = m->next; + + mbuf_offset = 0; + mbuf_avail = rte_pktmbuf_data_len(m); + } + + /* done with current desc buf, fetch next */ + if (desc_avail == 0) { + if ((desc->flags & VRING_DESC_F_NEXT) == 0) { + /* Room in vring buffer is not enough */ + return -1; + } + if (unlikely(desc->next >= size)) + return -1; + + desc = &descs[desc->next]; + desc_addr = gpa_to_vva(dev, desc->addr); + if (unlikely(!desc_addr)) + return -1; + + desc_offset = 0; + desc_avail = desc->len; + } + + cpy_len = RTE_MIN(desc_avail, mbuf_avail); + rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)), + rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), + cpy_len); + vhost_log_write(dev, desc->addr + desc_offset, cpy_len); + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), + cpy_len, 0); + + mbuf_avail -= cpy_len; + mbuf_offset += cpy_len; + desc_avail -= cpy_len; + desc_offset += cpy_len; + } + + return 0; +} + +/** + * This function adds buffers to the virtio devices RX virtqueue. Buffers can + * be received from the physical port or from another virtio device. A packet + * count is returned to indicate the number of packets that are succesfully + * added to the RX queue. This function works when the mbuf is scattered, but + * it doesn't support the mergeable feature. + */ +static inline uint32_t __attribute__((always_inline)) +virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, + struct rte_mbuf **pkts, uint32_t count) +{ + struct vhost_virtqueue *vq; + uint16_t avail_idx, free_entries, start_idx; + uint16_t desc_indexes[MAX_PKT_BURST]; + struct vring_desc *descs; + uint16_t used_idx; + uint32_t i, sz; + + LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { + RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", + dev->vid, __func__, queue_id); + return 0; + } + + vq = dev->virtqueue[queue_id]; + if (unlikely(vq->enabled == 0)) + return 0; + + avail_idx = *((volatile uint16_t *)&vq->avail->idx); + start_idx = vq->last_used_idx; + free_entries = avail_idx - start_idx; + count = RTE_MIN(count, free_entries); + count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST); + if (count == 0) + return 0; + + LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n", + dev->vid, start_idx, start_idx + count); + + /* Retrieve all of the desc indexes first to avoid caching issues. */ + rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]); + for (i = 0; i < count; i++) { + used_idx = (start_idx + i) & (vq->size - 1); + desc_indexes[i] = vq->avail->ring[used_idx]; + vq->used->ring[used_idx].id = desc_indexes[i]; + vq->used->ring[used_idx].len = pkts[i]->pkt_len + + dev->vhost_hlen; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[used_idx]), + sizeof(vq->used->ring[used_idx])); + } + + rte_prefetch0(&vq->desc[desc_indexes[0]]); + for (i = 0; i < count; i++) { + uint16_t desc_idx = desc_indexes[i]; + int err; + + if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) { + descs = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev, + vq->desc[desc_idx].addr); + if (unlikely(!descs)) { + count = i; + break; + } + + desc_idx = 0; + sz = vq->desc[desc_idx].len / sizeof(*descs); + } else { + descs = vq->desc; + sz = vq->size; + } + + err = copy_mbuf_to_desc(dev, descs, pkts[i], desc_idx, sz); + if (unlikely(err)) { + used_idx = (start_idx + i) & (vq->size - 1); + vq->used->ring[used_idx].len = dev->vhost_hlen; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[used_idx]), + sizeof(vq->used->ring[used_idx])); + } + + if (i + 1 < count) + rte_prefetch0(&vq->desc[desc_indexes[i+1]]); + } + + rte_smp_wmb(); + + *(volatile uint16_t *)&vq->used->idx += count; + vq->last_used_idx += count; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, idx), + sizeof(vq->used->idx)); + + /* flush used->idx update before we read avail->flags. */ + rte_mb(); + + /* Kick the guest if necessary. */ + if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) + && (vq->callfd >= 0)) + eventfd_write(vq->callfd, (eventfd_t)1); + return count; +} + +static inline int __attribute__((always_inline)) +fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint32_t avail_idx, uint32_t *vec_idx, + struct buf_vector *buf_vec, uint16_t *desc_chain_head, + uint16_t *desc_chain_len) +{ + uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; + uint32_t vec_id = *vec_idx; + uint32_t len = 0; + struct vring_desc *descs = vq->desc; + + *desc_chain_head = idx; + + if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { + descs = (struct vring_desc *)(uintptr_t) + gpa_to_vva(dev, vq->desc[idx].addr); + if (unlikely(!descs)) + return -1; + + idx = 0; + } + + while (1) { + if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size)) + return -1; + + len += descs[idx].len; + buf_vec[vec_id].buf_addr = descs[idx].addr; + buf_vec[vec_id].buf_len = descs[idx].len; + buf_vec[vec_id].desc_idx = idx; + vec_id++; + + if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0) + break; + + idx = descs[idx].next; + } + + *desc_chain_len = len; + *vec_idx = vec_id; + + return 0; +} + +/* + * Returns -1 on fail, 0 on success + */ +static inline int +reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint32_t size, struct buf_vector *buf_vec, + uint16_t *num_buffers, uint16_t avail_head) +{ + uint16_t cur_idx; + uint32_t vec_idx = 0; + uint16_t tries = 0; + + uint16_t head_idx = 0; + uint16_t len = 0; + + *num_buffers = 0; + cur_idx = vq->last_avail_idx; + + while (size > 0) { + if (unlikely(cur_idx == avail_head)) + return -1; + + if (unlikely(fill_vec_buf(dev, vq, cur_idx, &vec_idx, buf_vec, + &head_idx, &len) < 0)) + return -1; + len = RTE_MIN(len, size); + update_shadow_used_ring(vq, head_idx, len); + size -= len; + + cur_idx++; + tries++; + *num_buffers += 1; + + /* + * if we tried all available ring items, and still + * can't get enough buf, it means something abnormal + * happened. + */ + if (unlikely(tries >= vq->size)) + return -1; + } + + return 0; +} + +static inline int __attribute__((always_inline)) +copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, + struct buf_vector *buf_vec, uint16_t num_buffers) +{ + struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; + uint32_t vec_idx = 0; + uint64_t desc_addr; + uint32_t mbuf_offset, mbuf_avail; + uint32_t desc_offset, desc_avail; + uint32_t cpy_len; + uint64_t hdr_addr, hdr_phys_addr; + struct rte_mbuf *hdr_mbuf; + + if (unlikely(m == NULL)) + return -1; + + desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr); + if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) + return -1; + + hdr_mbuf = m; + hdr_addr = desc_addr; + hdr_phys_addr = buf_vec[vec_idx].buf_addr; + rte_prefetch0((void *)(uintptr_t)hdr_addr); + + virtio_hdr.num_buffers = num_buffers; + LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n", + dev->vid, num_buffers); + + desc_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; + desc_offset = dev->vhost_hlen; + + mbuf_avail = rte_pktmbuf_data_len(m); + mbuf_offset = 0; + while (mbuf_avail != 0 || m->next != NULL) { + /* done with current desc buf, get the next one */ + if (desc_avail == 0) { + vec_idx++; + desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr); + if (unlikely(!desc_addr)) + return -1; + + /* Prefetch buffer address. */ + rte_prefetch0((void *)(uintptr_t)desc_addr); + desc_offset = 0; + desc_avail = buf_vec[vec_idx].buf_len; + } + + /* done with current mbuf, get the next one */ + if (mbuf_avail == 0) { + m = m->next; + + mbuf_offset = 0; + mbuf_avail = rte_pktmbuf_data_len(m); + } + + if (hdr_addr) { + virtio_enqueue_offload(hdr_mbuf, &virtio_hdr.hdr); + copy_virtio_net_hdr(dev, hdr_addr, virtio_hdr); + vhost_log_write(dev, hdr_phys_addr, dev->vhost_hlen); + PRINT_PACKET(dev, (uintptr_t)hdr_addr, + dev->vhost_hlen, 0); + + hdr_addr = 0; + } + + cpy_len = RTE_MIN(desc_avail, mbuf_avail); + rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)), + rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), + cpy_len); + vhost_log_write(dev, buf_vec[vec_idx].buf_addr + desc_offset, + cpy_len); + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), + cpy_len, 0); + + mbuf_avail -= cpy_len; + mbuf_offset += cpy_len; + desc_avail -= cpy_len; + desc_offset += cpy_len; + } + + return 0; +} + +static inline uint32_t __attribute__((always_inline)) +virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, + struct rte_mbuf **pkts, uint32_t count) +{ + struct vhost_virtqueue *vq; + uint32_t pkt_idx = 0; + uint16_t num_buffers; + struct buf_vector buf_vec[BUF_VECTOR_MAX]; + uint16_t avail_head; + + LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { + RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", + dev->vid, __func__, queue_id); + return 0; + } + + vq = dev->virtqueue[queue_id]; + if (unlikely(vq->enabled == 0)) + return 0; + + count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); + if (count == 0) + return 0; + + rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); + + vq->shadow_used_idx = 0; + avail_head = *((volatile uint16_t *)&vq->avail->idx); + for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { + uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; + + if (unlikely(reserve_avail_buf_mergeable(dev, vq, + pkt_len, buf_vec, &num_buffers, + avail_head) < 0)) { + LOG_DEBUG(VHOST_DATA, + "(%d) failed to get enough desc from vring\n", + dev->vid); + vq->shadow_used_idx -= num_buffers; + break; + } + + LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n", + dev->vid, vq->last_avail_idx, + vq->last_avail_idx + num_buffers); + + if (copy_mbuf_to_desc_mergeable(dev, pkts[pkt_idx], + buf_vec, num_buffers) < 0) { + vq->shadow_used_idx -= num_buffers; + break; + } + + vq->last_avail_idx += num_buffers; + } + + if (likely(vq->shadow_used_idx)) { + flush_shadow_used_ring(dev, vq); + + /* flush used->idx update before we read avail->flags. */ + rte_mb(); + + /* Kick the guest if necessary. */ + if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) + && (vq->callfd >= 0)) + eventfd_write(vq->callfd, (eventfd_t)1); + } + + return pkt_idx; +} + +uint16_t +rte_vhost_enqueue_burst(int vid, uint16_t queue_id, + struct rte_mbuf **pkts, uint16_t count) +{ + struct virtio_net *dev = get_device(vid); + + if (!dev) + return 0; + + if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) + return virtio_dev_merge_rx(dev, queue_id, pkts, count); + else + return virtio_dev_rx(dev, queue_id, pkts, count); +} + +static inline bool +virtio_net_with_host_offload(struct virtio_net *dev) +{ + if (dev->features & + (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_ECN | + VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 | + VIRTIO_NET_F_HOST_UFO)) + return true; + + return false; +} + +static void +parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr) +{ + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; + void *l3_hdr = NULL; + struct ether_hdr *eth_hdr; + uint16_t ethertype; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + m->l2_len = sizeof(struct ether_hdr); + ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); + + if (ethertype == ETHER_TYPE_VLAN) { + struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1); + + m->l2_len += sizeof(struct vlan_hdr); + ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); + } + + l3_hdr = (char *)eth_hdr + m->l2_len; + + switch (ethertype) { + case ETHER_TYPE_IPv4: + ipv4_hdr = (struct ipv4_hdr *)l3_hdr; + *l4_proto = ipv4_hdr->next_proto_id; + m->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4; + *l4_hdr = (char *)l3_hdr + m->l3_len; + m->ol_flags |= PKT_TX_IPV4; + break; + case ETHER_TYPE_IPv6: + ipv6_hdr = (struct ipv6_hdr *)l3_hdr; + *l4_proto = ipv6_hdr->proto; + m->l3_len = sizeof(struct ipv6_hdr); + *l4_hdr = (char *)l3_hdr + m->l3_len; + m->ol_flags |= PKT_TX_IPV6; + break; + default: + m->l3_len = 0; + *l4_proto = 0; + break; + } +} + +static inline void __attribute__((always_inline)) +vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m) +{ + uint16_t l4_proto = 0; + void *l4_hdr = NULL; + struct tcp_hdr *tcp_hdr = NULL; + + if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) + return; + + parse_ethernet(m, &l4_proto, &l4_hdr); + if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { + if (hdr->csum_start == (m->l2_len + m->l3_len)) { + switch (hdr->csum_offset) { + case (offsetof(struct tcp_hdr, cksum)): + if (l4_proto == IPPROTO_TCP) + m->ol_flags |= PKT_TX_TCP_CKSUM; + break; + case (offsetof(struct udp_hdr, dgram_cksum)): + if (l4_proto == IPPROTO_UDP) + m->ol_flags |= PKT_TX_UDP_CKSUM; + break; + case (offsetof(struct sctp_hdr, cksum)): + if (l4_proto == IPPROTO_SCTP) + m->ol_flags |= PKT_TX_SCTP_CKSUM; + break; + default: + break; + } + } + } + + if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + case VIRTIO_NET_HDR_GSO_TCPV6: + tcp_hdr = (struct tcp_hdr *)l4_hdr; + m->ol_flags |= PKT_TX_TCP_SEG; + m->tso_segsz = hdr->gso_size; + m->l4_len = (tcp_hdr->data_off & 0xf0) >> 2; + break; + default: + RTE_LOG(WARNING, VHOST_DATA, + "unsupported gso type %u.\n", hdr->gso_type); + break; + } + } +} + +#define RARP_PKT_SIZE 64 + +static int +make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac) +{ + struct ether_hdr *eth_hdr; + struct arp_hdr *rarp; + + if (rarp_mbuf->buf_len < 64) { + RTE_LOG(WARNING, VHOST_DATA, + "failed to make RARP; mbuf size too small %u (< %d)\n", + rarp_mbuf->buf_len, RARP_PKT_SIZE); + return -1; + } + + /* Ethernet header. */ + eth_hdr = rte_pktmbuf_mtod_offset(rarp_mbuf, struct ether_hdr *, 0); + memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN); + ether_addr_copy(mac, ð_hdr->s_addr); + eth_hdr->ether_type = htons(ETHER_TYPE_RARP); + + /* RARP header. */ + rarp = (struct arp_hdr *)(eth_hdr + 1); + rarp->arp_hrd = htons(ARP_HRD_ETHER); + rarp->arp_pro = htons(ETHER_TYPE_IPv4); + rarp->arp_hln = ETHER_ADDR_LEN; + rarp->arp_pln = 4; + rarp->arp_op = htons(ARP_OP_REVREQUEST); + + ether_addr_copy(mac, &rarp->arp_data.arp_sha); + ether_addr_copy(mac, &rarp->arp_data.arp_tha); + memset(&rarp->arp_data.arp_sip, 0x00, 4); + memset(&rarp->arp_data.arp_tip, 0x00, 4); + + rarp_mbuf->pkt_len = rarp_mbuf->data_len = RARP_PKT_SIZE; + + return 0; +} + +static inline void __attribute__((always_inline)) +put_zmbuf(struct zcopy_mbuf *zmbuf) +{ + zmbuf->in_use = 0; +} + +static inline int __attribute__((always_inline)) +copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, + uint16_t max_desc, struct rte_mbuf *m, uint16_t desc_idx, + struct rte_mempool *mbuf_pool) +{ + struct vring_desc *desc; + uint64_t desc_addr; + uint32_t desc_avail, desc_offset; + uint32_t mbuf_avail, mbuf_offset; + uint32_t cpy_len; + struct rte_mbuf *cur = m, *prev = m; + struct virtio_net_hdr *hdr = NULL; + /* A counter to avoid desc dead loop chain */ + uint32_t nr_desc = 1; + + desc = &descs[desc_idx]; + if (unlikely((desc->len < dev->vhost_hlen)) || + (desc->flags & VRING_DESC_F_INDIRECT)) + return -1; + + desc_addr = gpa_to_vva(dev, desc->addr); + if (unlikely(!desc_addr)) + return -1; + + if (virtio_net_with_host_offload(dev)) { + hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr); + rte_prefetch0(hdr); + } + + /* + * A virtio driver normally uses at least 2 desc buffers + * for Tx: the first for storing the header, and others + * for storing the data. + */ + if (likely((desc->len == dev->vhost_hlen) && + (desc->flags & VRING_DESC_F_NEXT) != 0)) { + desc = &descs[desc->next]; + if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) + return -1; + + desc_addr = gpa_to_vva(dev, desc->addr); + if (unlikely(!desc_addr)) + return -1; + + desc_offset = 0; + desc_avail = desc->len; + nr_desc += 1; + } else { + desc_avail = desc->len - dev->vhost_hlen; + desc_offset = dev->vhost_hlen; + } + + rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset)); + + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0); + + mbuf_offset = 0; + mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; + while (1) { + uint64_t hpa; + + cpy_len = RTE_MIN(desc_avail, mbuf_avail); + + /* + * A desc buf might across two host physical pages that are + * not continuous. In such case (gpa_to_hpa returns 0), data + * will be copied even though zero copy is enabled. + */ + if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev, + desc->addr + desc_offset, cpy_len)))) { + cur->data_len = cpy_len; + cur->data_off = 0; + cur->buf_addr = (void *)(uintptr_t)desc_addr; + cur->buf_physaddr = hpa; + + /* + * In zero copy mode, one mbuf can only reference data + * for one or partial of one desc buff. + */ + mbuf_avail = cpy_len; + } else { + rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, + mbuf_offset), + (void *)((uintptr_t)(desc_addr + desc_offset)), + cpy_len); + } + + mbuf_avail -= cpy_len; + mbuf_offset += cpy_len; + desc_avail -= cpy_len; + desc_offset += cpy_len; + + /* This desc reaches to its end, get the next one */ + if (desc_avail == 0) { + if ((desc->flags & VRING_DESC_F_NEXT) == 0) + break; + + if (unlikely(desc->next >= max_desc || + ++nr_desc > max_desc)) + return -1; + desc = &descs[desc->next]; + if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) + return -1; + + desc_addr = gpa_to_vva(dev, desc->addr); + if (unlikely(!desc_addr)) + return -1; + + rte_prefetch0((void *)(uintptr_t)desc_addr); + + desc_offset = 0; + desc_avail = desc->len; + + PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0); + } + + /* + * This mbuf reaches to its end, get a new one + * to hold more data. + */ + if (mbuf_avail == 0) { + cur = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(cur == NULL)) { + RTE_LOG(ERR, VHOST_DATA, "Failed to " + "allocate memory for mbuf.\n"); + return -1; + } + + prev->next = cur; + prev->data_len = mbuf_offset; + m->nb_segs += 1; + m->pkt_len += mbuf_offset; + prev = cur; + + mbuf_offset = 0; + mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; + } + } + + prev->data_len = mbuf_offset; + m->pkt_len += mbuf_offset; + + if (hdr) + vhost_dequeue_offload(hdr, m); + + return 0; +} + +static inline void __attribute__((always_inline)) +update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint32_t used_idx, uint32_t desc_idx) +{ + vq->used->ring[used_idx].id = desc_idx; + vq->used->ring[used_idx].len = 0; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[used_idx]), + sizeof(vq->used->ring[used_idx])); +} + +static inline void __attribute__((always_inline)) +update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint32_t count) +{ + if (unlikely(count == 0)) + return; + + rte_smp_wmb(); + rte_smp_rmb(); + + vq->used->idx += count; + vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), + sizeof(vq->used->idx)); + + /* Kick guest if required. */ + if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) + && (vq->callfd >= 0)) + eventfd_write(vq->callfd, (eventfd_t)1); +} + +static inline struct zcopy_mbuf *__attribute__((always_inline)) +get_zmbuf(struct vhost_virtqueue *vq) +{ + uint16_t i; + uint16_t last; + int tries = 0; + + /* search [last_zmbuf_idx, zmbuf_size) */ + i = vq->last_zmbuf_idx; + last = vq->zmbuf_size; + +again: + for (; i < last; i++) { + if (vq->zmbufs[i].in_use == 0) { + vq->last_zmbuf_idx = i + 1; + vq->zmbufs[i].in_use = 1; + return &vq->zmbufs[i]; + } + } + + tries++; + if (tries == 1) { + /* search [0, last_zmbuf_idx) */ + i = 0; + last = vq->last_zmbuf_idx; + goto again; + } + + return NULL; +} + +static inline bool __attribute__((always_inline)) +mbuf_is_consumed(struct rte_mbuf *m) +{ + while (m) { + if (rte_mbuf_refcnt_read(m) > 1) + return false; + m = m->next; + } + + return true; +} + +uint16_t +rte_vhost_dequeue_burst(int vid, uint16_t queue_id, + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) +{ + struct virtio_net *dev; + struct rte_mbuf *rarp_mbuf = NULL; + struct vhost_virtqueue *vq; + uint32_t desc_indexes[MAX_PKT_BURST]; + uint32_t used_idx; + uint32_t i = 0; + uint16_t free_entries; + uint16_t avail_idx; + + dev = get_device(vid); + if (!dev) + return 0; + + if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) { + RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", + dev->vid, __func__, queue_id); + return 0; + } + + vq = dev->virtqueue[queue_id]; + if (unlikely(vq->enabled == 0)) + return 0; + + if (unlikely(dev->dequeue_zero_copy)) { + struct zcopy_mbuf *zmbuf, *next; + int nr_updated = 0; + + for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list); + zmbuf != NULL; zmbuf = next) { + next = TAILQ_NEXT(zmbuf, next); + + if (mbuf_is_consumed(zmbuf->mbuf)) { + used_idx = vq->last_used_idx++ & (vq->size - 1); + update_used_ring(dev, vq, used_idx, + zmbuf->desc_idx); + nr_updated += 1; + + TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next); + rte_pktmbuf_free(zmbuf->mbuf); + put_zmbuf(zmbuf); + vq->nr_zmbuf -= 1; + } + } + + update_used_idx(dev, vq, nr_updated); + } + + /* + * Construct a RARP broadcast packet, and inject it to the "pkts" + * array, to looks like that guest actually send such packet. + * + * Check user_send_rarp() for more information. + */ + if (unlikely(rte_atomic16_cmpset((volatile uint16_t *) + &dev->broadcast_rarp.cnt, 1, 0))) { + rarp_mbuf = rte_pktmbuf_alloc(mbuf_pool); + if (rarp_mbuf == NULL) { + RTE_LOG(ERR, VHOST_DATA, + "Failed to allocate memory for mbuf.\n"); + return 0; + } + + if (make_rarp_packet(rarp_mbuf, &dev->mac)) { + rte_pktmbuf_free(rarp_mbuf); + rarp_mbuf = NULL; + } else { + count -= 1; + } + } + + free_entries = *((volatile uint16_t *)&vq->avail->idx) - + vq->last_avail_idx; + if (free_entries == 0) + goto out; + + LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); + + /* Prefetch available and used ring */ + avail_idx = vq->last_avail_idx & (vq->size - 1); + used_idx = vq->last_used_idx & (vq->size - 1); + rte_prefetch0(&vq->avail->ring[avail_idx]); + rte_prefetch0(&vq->used->ring[used_idx]); + + count = RTE_MIN(count, MAX_PKT_BURST); + count = RTE_MIN(count, free_entries); + LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n", + dev->vid, count); + + /* Retrieve all of the head indexes first to avoid caching issues. */ + for (i = 0; i < count; i++) { + avail_idx = (vq->last_avail_idx + i) & (vq->size - 1); + used_idx = (vq->last_used_idx + i) & (vq->size - 1); + desc_indexes[i] = vq->avail->ring[avail_idx]; + + if (likely(dev->dequeue_zero_copy == 0)) + update_used_ring(dev, vq, used_idx, desc_indexes[i]); + } + + /* Prefetch descriptor index. */ + rte_prefetch0(&vq->desc[desc_indexes[0]]); + for (i = 0; i < count; i++) { + struct vring_desc *desc; + uint16_t sz, idx; + int err; + + if (likely(i + 1 < count)) + rte_prefetch0(&vq->desc[desc_indexes[i + 1]]); + + if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) { + desc = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev, + vq->desc[desc_indexes[i]].addr); + if (unlikely(!desc)) + break; + + rte_prefetch0(desc); + sz = vq->desc[desc_indexes[i]].len / sizeof(*desc); + idx = 0; + } else { + desc = vq->desc; + sz = vq->size; + idx = desc_indexes[i]; + } + + pkts[i] = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(pkts[i] == NULL)) { + RTE_LOG(ERR, VHOST_DATA, + "Failed to allocate memory for mbuf.\n"); + break; + } + + err = copy_desc_to_mbuf(dev, desc, sz, pkts[i], idx, mbuf_pool); + if (unlikely(err)) { + rte_pktmbuf_free(pkts[i]); + break; + } + + if (unlikely(dev->dequeue_zero_copy)) { + struct zcopy_mbuf *zmbuf; + + zmbuf = get_zmbuf(vq); + if (!zmbuf) { + rte_pktmbuf_free(pkts[i]); + break; + } + zmbuf->mbuf = pkts[i]; + zmbuf->desc_idx = desc_indexes[i]; + + /* + * Pin lock the mbuf; we will check later to see + * whether the mbuf is freed (when we are the last + * user) or not. If that's the case, we then could + * update the used ring safely. + */ + rte_mbuf_refcnt_update(pkts[i], 1); + + vq->nr_zmbuf += 1; + TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next); + } + } + vq->last_avail_idx += i; + + if (likely(dev->dequeue_zero_copy == 0)) { + vq->last_used_idx += i; + update_used_idx(dev, vq, i); + } + +out: + if (unlikely(rarp_mbuf != NULL)) { + /* + * Inject it to the head of "pkts" array, so that switch's mac + * learning table will get updated first. + */ + memmove(&pkts[1], pkts, i * sizeof(struct rte_mbuf *)); + pkts[0] = rarp_mbuf; + i += 1; + } + + return i; +} -- cgit 1.2.3-korg